zeux / meshoptimizer
1
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
2
#include "meshoptimizer.h"
3

4
#include <assert.h>
5
#include <string.h>
6

7
// This work is based on:
8
// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006
9
// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007
10
namespace meshopt
11
{
12

13
const size_t kCacheSizeMax = 16;
14
const size_t kValenceMax = 8;
15

16
struct VertexScoreTable
17
{
18
	float cache[1 + kCacheSizeMax];
19
	float live[1 + kValenceMax];
20
};
21

22
// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD
23
static const VertexScoreTable kVertexScoreTable = {
24
    {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f},
25
    {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f},
26
};
27

28
// Tuned to minimize the encoded index buffer size
29
static const VertexScoreTable kVertexScoreTableStrip = {
30
    {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f},
31
    {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f},
32
};
33

34
struct TriangleAdjacency
35
{
36
	unsigned int* counts;
37
	unsigned int* offsets;
38
	unsigned int* data;
39
};
40

41 3
static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator)
42
{
43 3
	size_t face_count = index_count / 3;
44

45
	// allocate arrays
46 3
	adjacency.counts = allocator.allocate<unsigned int>(vertex_count);
47 3
	adjacency.offsets = allocator.allocate<unsigned int>(vertex_count);
48 3
	adjacency.data = allocator.allocate<unsigned int>(index_count);
49

50
	// fill triangle counts
51 3
	memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int));
52

53 3
	for (size_t i = 0; i < index_count; ++i)
54
	{
55 3
		assert(indices[i] < vertex_count);
56

57 3
		adjacency.counts[indices[i]]++;
58
	}
59

60
	// fill offset table
61 3
	unsigned int offset = 0;
62

63 3
	for (size_t i = 0; i < vertex_count; ++i)
64
	{
65 3
		adjacency.offsets[i] = offset;
66 3
		offset += adjacency.counts[i];
67
	}
68

69 3
	assert(offset == index_count);
70

71
	// fill triangle data
72 3
	for (size_t i = 0; i < face_count; ++i)
73
	{
74 3
		unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2];
75

76 3
		adjacency.data[adjacency.offsets[a]++] = unsigned(i);
77 3
		adjacency.data[adjacency.offsets[b]++] = unsigned(i);
78 3
		adjacency.data[adjacency.offsets[c]++] = unsigned(i);
79
	}
80

81
	// fix offsets that have been disturbed by the previous pass
82 3
	for (size_t i = 0; i < vertex_count; ++i)
83
	{
84 3
		assert(adjacency.offsets[i] >= adjacency.counts[i]);
85

86 3
		adjacency.offsets[i] -= adjacency.counts[i];
87
	}
88
}
89

90 3
static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count)
91
{
92
	// check dead-end stack
93 3
	while (dead_end_top)
94
	{
95 3
		unsigned int vertex = dead_end[--dead_end_top];
96

97 3
		if (live_triangles[vertex] > 0)
98 3
			return vertex;
99
	}
100

101
	// input order
102 3
	while (input_cursor < vertex_count)
103
	{
104 3
		if (live_triangles[input_cursor] > 0)
105 3
			return input_cursor;
106

107 3
		++input_cursor;
108
	}
109

110 3
	return ~0u;
111
}
112

113 3
static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size)
114
{
115 3
	unsigned int best_candidate = ~0u;
116 3
	int best_priority = -1;
117

118 3
	for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate)
119
	{
120 3
		unsigned int vertex = *next_candidate;
121

122
		// otherwise we don't need to process it
123 3
		if (live_triangles[vertex] > 0)
124
		{
125 3
			int priority = 0;
126

127
			// will it be in cache after fanning?
128 3
			if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size)
129
			{
130 3
				priority = timestamp - cache_timestamps[vertex]; // position in cache
131
			}
132

133 3
			if (priority > best_priority)
134
			{
135 3
				best_candidate = vertex;
136 3
				best_priority = priority;
137
			}
138
		}
139
	}
140

141 3
	return best_candidate;
142
}
143

144 3
static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles)
145
{
146 3
	assert(cache_position >= -1 && cache_position < int(kCacheSizeMax));
147

148 3
	unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax;
149

150 3
	return table->cache[1 + cache_position] + table->live[live_triangles_clamped];
151
}
152

153 3
static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count)
154
{
155
	// input order
156 3
	while (input_cursor < face_count)
157
	{
158 3
		if (!emitted_flags[input_cursor])
159 3
			return input_cursor;
160

161 3
		++input_cursor;
162
	}
163

164 3
	return ~0u;
165
}
166

167
} // namespace meshopt
168

169 3
void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table)
170
{
171
	using namespace meshopt;
172

173 3
	assert(index_count % 3 == 0);
174

175 3
	meshopt_Allocator allocator;
176

177
	// guard for empty meshes
178 3
	if (index_count == 0 || vertex_count == 0)
179 3
		return;
180

181
	// support in-place optimization
182 3
	if (destination == indices)
183
	{
184 3
		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
185 3
		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
186 3
		indices = indices_copy;
187
	}
188

189 3
	unsigned int cache_size = 16;
190 3
	assert(cache_size <= kCacheSizeMax);
191

192 3
	size_t face_count = index_count / 3;
193

194
	// build adjacency information
195 3
	TriangleAdjacency adjacency = {};
196 3
	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
197

198
	// live triangle counts
199 3
	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
200 3
	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
201

202
	// emitted flags
203 3
	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
204 3
	memset(emitted_flags, 0, face_count);
205

206
	// compute initial vertex scores
207 3
	float* vertex_scores = allocator.allocate<float>(vertex_count);
208

209 3
	for (size_t i = 0; i < vertex_count; ++i)
210 3
		vertex_scores[i] = vertexScore(table, -1, live_triangles[i]);
211

212
	// compute triangle scores
213 3
	float* triangle_scores = allocator.allocate<float>(face_count);
214

215 3
	for (size_t i = 0; i < face_count; ++i)
216
	{
217 3
		unsigned int a = indices[i * 3 + 0];
218 3
		unsigned int b = indices[i * 3 + 1];
219 3
		unsigned int c = indices[i * 3 + 2];
220

221 3
		triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c];
222
	}
223

224
	unsigned int cache_holder[2 * (kCacheSizeMax + 3)];
225 3
	unsigned int* cache = cache_holder;
226 3
	unsigned int* cache_new = cache_holder + kCacheSizeMax + 3;
227 3
	size_t cache_count = 0;
228

229 3
	unsigned int current_triangle = 0;
230 3
	unsigned int input_cursor = 1;
231

232 3
	unsigned int output_triangle = 0;
233

234 3
	while (current_triangle != ~0u)
235
	{
236 3
		assert(output_triangle < face_count);
237

238 3
		unsigned int a = indices[current_triangle * 3 + 0];
239 3
		unsigned int b = indices[current_triangle * 3 + 1];
240 3
		unsigned int c = indices[current_triangle * 3 + 2];
241

242
		// output indices
243 3
		destination[output_triangle * 3 + 0] = a;
244 3
		destination[output_triangle * 3 + 1] = b;
245 3
		destination[output_triangle * 3 + 2] = c;
246 3
		output_triangle++;
247

248
		// update emitted flags
249 3
		emitted_flags[current_triangle] = true;
250 3
		triangle_scores[current_triangle] = 0;
251

252
		// new triangle
253 3
		size_t cache_write = 0;
254 3
		cache_new[cache_write++] = a;
255 3
		cache_new[cache_write++] = b;
256 3
		cache_new[cache_write++] = c;
257

258
		// old triangles
259 3
		for (size_t i = 0; i < cache_count; ++i)
260
		{
261 3
			unsigned int index = cache[i];
262

263 3
			if (index != a && index != b && index != c)
264
			{
265 3
				cache_new[cache_write++] = index;
266
			}
267
		}
268

269 3
		unsigned int* cache_temp = cache;
270 3
		cache = cache_new, cache_new = cache_temp;
271 3
		cache_count = cache_write > cache_size ? cache_size : cache_write;
272

273
		// update live triangle counts
274 3
		live_triangles[a]--;
275 3
		live_triangles[b]--;
276 3
		live_triangles[c]--;
277

278
		// remove emitted triangle from adjacency data
279
		// this makes sure that we spend less time traversing these lists on subsequent iterations
280 3
		for (size_t k = 0; k < 3; ++k)
281
		{
282 3
			unsigned int index = indices[current_triangle * 3 + k];
283

284 3
			unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index];
285 3
			size_t neighbours_size = adjacency.counts[index];
286

287 3
			for (size_t i = 0; i < neighbours_size; ++i)
288
			{
289 3
				unsigned int tri = neighbours[i];
290

291 3
				if (tri == current_triangle)
292
				{
293 3
					neighbours[i] = neighbours[neighbours_size - 1];
294 3
					adjacency.counts[index]--;
295 3
					break;
296
				}
297
			}
298
		}
299

300 3
		unsigned int best_triangle = ~0u;
301 3
		float best_score = 0;
302

303
		// update cache positions, vertex scores and triangle scores, and find next best triangle
304 3
		for (size_t i = 0; i < cache_write; ++i)
305
		{
306 3
			unsigned int index = cache[i];
307

308 3
			int cache_position = i >= cache_size ? -1 : int(i);
309

310
			// update vertex score
311 3
			float score = vertexScore(table, cache_position, live_triangles[index]);
312 3
			float score_diff = score - vertex_scores[index];
313

314 3
			vertex_scores[index] = score;
315

316
			// update scores of vertex triangles
317 3
			const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index];
318 3
			const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index];
319

320 3
			for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
321
			{
322 3
				unsigned int tri = *it;
323 3
				assert(!emitted_flags[tri]);
324

325 3
				float tri_score = triangle_scores[tri] + score_diff;
326 3
				assert(tri_score > 0);
327

328 3
				if (best_score < tri_score)
329
				{
330 3
					best_triangle = tri;
331 3
					best_score = tri_score;
332
				}
333

334 3
				triangle_scores[tri] = tri_score;
335
			}
336
		}
337

338
		// step through input triangles in order if we hit a dead-end
339 3
		current_triangle = best_triangle;
340

341 3
		if (current_triangle == ~0u)
342
		{
343 3
			current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count);
344
		}
345
	}
346

347 3
	assert(input_cursor == face_count);
348 3
	assert(output_triangle == face_count);
349
}
350

351 3
void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
352
{
353 3
	meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable);
354
}
355

356 3
void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count)
357
{
358 3
	meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip);
359
}
360

361 3
void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size)
362
{
363
	using namespace meshopt;
364

365 3
	assert(index_count % 3 == 0);
366 3
	assert(cache_size >= 3);
367

368 3
	meshopt_Allocator allocator;
369

370
	// guard for empty meshes
371 3
	if (index_count == 0 || vertex_count == 0)
372 3
		return;
373

374
	// support in-place optimization
375 3
	if (destination == indices)
376
	{
377 3
		unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count);
378 3
		memcpy(indices_copy, indices, index_count * sizeof(unsigned int));
379 3
		indices = indices_copy;
380
	}
381

382 3
	size_t face_count = index_count / 3;
383

384
	// build adjacency information
385 3
	TriangleAdjacency adjacency = {};
386 3
	buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator);
387

388
	// live triangle counts
389 3
	unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count);
390 3
	memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int));
391

392
	// cache time stamps
393 3
	unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count);
394 3
	memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int));
395

396
	// dead-end stack
397 3
	unsigned int* dead_end = allocator.allocate<unsigned int>(index_count);
398 3
	unsigned int dead_end_top = 0;
399

400
	// emitted flags
401 3
	unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count);
402 3
	memset(emitted_flags, 0, face_count);
403

404 3
	unsigned int current_vertex = 0;
405

406 3
	unsigned int timestamp = cache_size + 1;
407 3
	unsigned int input_cursor = 1; // vertex to restart from in case of dead-end
408

409 3
	unsigned int output_triangle = 0;
410

411 3
	while (current_vertex != ~0u)
412
	{
413 3
		const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top;
414

415
		// emit all vertex neighbours
416 3
		const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex];
417 3
		const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex];
418

419 3
		for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it)
420
		{
421 3
			unsigned int triangle = *it;
422

423 3
			if (!emitted_flags[triangle])
424
			{
425 3
				unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2];
426

427
				// output indices
428 3
				destination[output_triangle * 3 + 0] = a;
429 3
				destination[output_triangle * 3 + 1] = b;
430 3
				destination[output_triangle * 3 + 2] = c;
431 3
				output_triangle++;
432

433
				// update dead-end stack
434 3
				dead_end[dead_end_top + 0] = a;
435 3
				dead_end[dead_end_top + 1] = b;
436 3
				dead_end[dead_end_top + 2] = c;
437 3
				dead_end_top += 3;
438

439
				// update live triangle counts
440 3
				live_triangles[a]--;
441 3
				live_triangles[b]--;
442 3
				live_triangles[c]--;
443

444
				// update cache info
445
				// if vertex is not in cache, put it in cache
446 3
				if (timestamp - cache_timestamps[a] > cache_size)
447 3
					cache_timestamps[a] = timestamp++;
448

449 3
				if (timestamp - cache_timestamps[b] > cache_size)
450 3
					cache_timestamps[b] = timestamp++;
451

452 3
				if (timestamp - cache_timestamps[c] > cache_size)
453 3
					cache_timestamps[c] = timestamp++;
454

455
				// update emitted flags
456 3
				emitted_flags[triangle] = true;
457
			}
458
		}
459

460
		// next candidates are the ones we pushed to dead-end stack just now
461 3
		const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top;
462

463
		// get next vertex
464 3
		current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size);
465

466 3
		if (current_vertex == ~0u)
467
		{
468 3
			current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count);
469
		}
470
	}
471

472 3
	assert(output_triangle == face_count);
473
}

Read our documentation on viewing source code .

Loading