跳转至

Datagenerator 库使用手册

本库修改自 一扶苏一的数据生成库

Datagenerator 是一个专门用来造数据用的库,使用 c++17。你只需要将 datagenerator.hpp 和 generator.cpp 放在同一目录下并引入该头文件即可。

代码

实现
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
#pragma once
#include <bits/stdc++.h>
namespace util {
    using ll = long long;
    /// Random number generator
    inline std::mt19937_64 rnd(std::random_device{}());
    /**
     * @brief Set random seed
     * @param seed Random seed
     */
    inline void setSeed(unsigned long long seed) {
        rnd.seed(seed);
    }
    /**
     * @brief Generate random number in [0, x-1]
     * @tparam T Integer type
     * @param x Upper bound (exclusive)
     * @return Random number in [0, x-1]
     */
    template <typename T>
    inline T modx(T x) {
        assert(x > 0);
        return rnd() % x;
    }
    /**
     * @brief Generate random number in [1, x]
     * @tparam T Integer type
     * @param x Upper bound (inclusive)
     * @return Random number in [1, x]
     */
    template <typename T>
    inline T mod1(T x) { return modx(x) + 1; }
    /**
     * @brief Generate random number in [l, r]
     * @tparam T Integer type
     * @param l Lower bound (inclusive)
     * @param r Upper bound (inclusive)
     * @return Random number in [l, r]
     */
    template <typename T>
    inline T rangeRand(T l, T r) { return l + modx(r - l + 1); }
    /**
     * @brief Generate random range [l, r] within [L, R]
     * @tparam T Integer type
     * @param L Lower bound of range
     * @param R Upper bound of range
     * @return Pair of (l, r) where l <= r and both in [L, R]
     */
    template <typename T>
    inline std::pair<T, T> randRange(T L, T r) {
        T l = rangeRand(L, r);
        T rVal = rangeRand(L, r);
        if (l > rVal) std::swap(l, rVal);
        return std::make_pair(l, rVal);
    }
    /**
     * @brief Generate random lowercase character
     * @return Random character in 'a'~'z'
     */
    inline char randChar() { return 'a' + modx(26); }
    /**
     * @brief Generate array with given length and generation function
     * @tparam T Element type
     * @param len Array length
     * @param lim Limit parameter for generation function
     * @param genFunc Generation function
     * @return Generated array
     */
    template <typename T>
    inline std::vector<T> genArr(int len, T lim, std::function<T(T)> genFunc) {
        std::vector<T> ret;
        for (int i = 0; i < len; ++i) ret.push_back(genFunc(lim));
        return ret;
    }
    /**
     * @brief Generate random string
     * @param len String length
     * @param genChar Character generation function
     * @return Generated string
     */
    inline std::string genStr(int len, std::function<char()> genChar = randChar) {
        std::string ret;
        for (int i = 0; i < len; ++i) ret += genChar();
        return ret;
    }
    /**
     * @brief Randomly choose element from vector
     * @tparam T Element type
     * @param a Input vector
     * @return Randomly chosen element
     */
    template <typename T>
    T choice(const std::vector<T> &a) { return a[modx(a.size())]; }
    /**
     * @brief Randomly choose element from array
     * @tparam T Element type
     * @tparam N Array size
     * @param a Input array
     * @return Randomly chosen element
     */
    template <typename T, size_t N>
    T choice(const std::array<T, N> &a) { return a[modx(a.size())]; }
    /**
     * @brief Shuffle vector randomly
     * @tparam T Element type
     * @param a Vector to shuffle
     */
    template <typename T>
    void shuffleVec(std::vector<T> &a) {
        std::shuffle(a.begin(), a.end(), rnd);
    }
    /**
     * @brief Print array with given separator and ending
     * @tparam T Element type
     * @param arr Array to print
     * @param split Separator between elements
     * @param end Ending string
     */
    template <typename T>
    void printArr(const std::vector<T> &arr, const std::string &split = " ", const std::string &end = "\n") {
        for (auto it = arr.begin(); it != arr.end(); ++it) {
            std::cout << *it;
            if (std::next(it) != arr.end()) std::cout << split;
        }
        std::cout << end;
    }
    /**
     * @brief Print multiple values with space separation
     * @tparam First First argument type
     * @tparam Args Other arguments types
     * @param first First argument
     * @param args Other arguments
     */
    template <typename First, typename... Args>
    void println(const First &first, Args &&...args) {
        std::cout << first;
        using expander = int[];
        (void)expander{0, (void(std::cout << " " << std::forward<Args>(args)), 0)...};
        std::cout << '\n';
    }
    /**
     * @brief Create directory if not exists
     * @param dirName Directory name
     */
    inline void mkdir(const std::string &dirName) {
        namespace fs = std::filesystem;
        if (!fs::exists(dirName) || !fs::is_directory(dirName)) {
            fs::create_directory(dirName);
            std::cerr << "Created directory: " << dirName << "\n";
        }
    }
    // 图论模块放在子命名空间 graph 中
    namespace graph {
        /**
         * @brief Helper to check if type is a pair of integers
         */
        template <typename T>
        struct IsIntegerPair : std::false_type {};
        template <>
        struct IsIntegerPair<std::pair<int, int>> : std::true_type {};
        /**
         * @brief Graph class for storing and generating various types of graphs
         * @tparam WeightType Weight type (use int for unweighted graphs)
         */
        template <typename WeightType = int>
        class Graph {
        private:
            int vertexCount;                                        ///< Number of vertices
            int edgeCount;                                          ///< Number of edges
            std::vector<std::tuple<int, int, WeightType>> edgeList; ///< Edge list (u, v, w)
        public:
            /**
             * @brief Construct a new Graph object
             * @param n Number of vertices
             */
            Graph(int n = 0) : vertexCount(n), edgeCount(0) {}
            /**
             * @brief Add an edge to the graph
             * @param fromVertex From vertex (1-indexed)
             * @param toVertex To vertex (1-indexed)
             * @param weight Edge weight
             */
            void addEdge(int fromVertex, int toVertex, WeightType weight = WeightType{1}) {
                edgeList.emplace_back(fromVertex, toVertex, weight);
                ++edgeCount;
            }
            /**
             * @brief Get number of vertices
             * @return int Number of vertices
             */
            int getVertexCount() const { return vertexCount; }
            /**
             * @brief Get number of edges
             * @return int Number of edges
             */
            int getEdgeCount() const { return edgeCount; }
            /**
             * @brief Get edge list
             * @return const std::vector<std::tuple<int, int, WeightType>>& Edge list
             */
            const std::vector<std::tuple<int, int, WeightType>> &getEdgeList() const { return edgeList; }
            /**
             * @brief Print the graph edges only (without vertex and edge counts)
             * @param printWeights Whether to print edge weights
             */
            void print(bool printWeights = true) const {
                for (const auto &edge : edgeList) {
                    int u = std::get<0>(edge), v = std::get<1>(edge);
                    WeightType w = std::get<2>(edge);
                    if (printWeights) {
                        println(u, v, w);
                    } else {
                        println(u, v);
                    }
                }
            }
            /**
             * @brief Print the graph with vertex and edge counts
             * @param printWeights Whether to print edge weights
             */
            void printWithInfo(bool printWeights = true) const {
                println(vertexCount, edgeCount);
                print(printWeights);
            }
            /**
             * @brief Print the tree with only vertex count (no edge count for trees)
             * @param printWeights Whether to print edge weights
             */
            void printTree(bool printWeights = true) const {
                // For trees, we only print the vertex count, not the edge count
                std::cout << vertexCount << "\n";
                print(printWeights);
            }
            /**
             * @brief Print parent array for trees (f2, f3, ..., fn)
             * @param parentArray Parent array where parentArray[i] is the parent of vertex i+1
             * @param rootVertex Root vertex (default: 1)
             */
            static void printParentArray(const std::vector<int> &parentArray, int rootVertex = 1) {
                for (int i = 2; i <= (int)parentArray.size(); ++i) {
                    std::cout << parentArray[i - 1];
                    if (i < (int)parentArray.size()) std::cout << " ";
                }
                std::cout << "\n";
            }
        };
        /**
         * @brief Helper function to generate weight based on input type
         * @tparam WeightType Weight type
         * @param weightGenerator Weight generator (function, pair, or value)
         * @param u From vertex (for functions that need it)
         * @param v To vertex (for functions that need it)
         * @return Generated weight
         */
        template <typename WeightType, typename Func>
        WeightType generateWeight(Func &&weightGenerator, int u = 0, int v = 0) {
            if constexpr (IsIntegerPair<std::decay_t<Func>>::value) {
                // If weightGenerator is a pair, treat it as [min, max] range
                return rangeRand(weightGenerator.first, weightGenerator.second);
            } else if constexpr (std::is_invocable_r<WeightType, Func, int, int>::value) {
                // If weightGenerator is a function taking two vertices
                return weightGenerator(u, v);
            } else if constexpr (std::is_invocable_r<WeightType, Func>::value) {
                // If weightGenerator is a function taking no arguments
                return weightGenerator();
            } else {
                // If weightGenerator is a constant value
                return weightGenerator;
            }
        }
        /**
         * @brief Generate a random tree
         * @param vertexCount Number of vertices
         * @param rootVertex Root vertex (1-indexed)
         * @param requireParentLess Whether to require parent < child
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges (not used for trees)
         * @param allowSelfLoop Whether to allow self loops (not used for trees)
         * @param ensureConnected Whether to ensure graph is connected (always true for trees)
         * @return Graph<WeightType> Generated tree
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genTree(int vertexCount, int rootVertex = 1, bool requireParentLess = false,
                                  Func &&weightGenerator = WeightType{1},
                                  bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            Graph<WeightType> tree(vertexCount);
            std::vector<int> parentArray(vertexCount + 1, 0);
            if (requireParentLess) {
                // Each vertex i has parent in [1, i-1]
                for (int i = 2; i <= vertexCount; ++i) {
                    int parent = rangeRand(1, i - 1);
                    parentArray[i] = parent;
                    WeightType weight = generateWeight<WeightType>(weightGenerator, parent, i);
                    tree.addEdge(parent, i, weight);
                }
            } else {
                // Random tree construction using random parent assignment
                std::vector<int> vertices(vertexCount);
                for (int i = 0; i < vertexCount; ++i) vertices[i] = i + 1;
                shuffleVec(vertices);
                // Remove root from available vertices
                vertices.erase(std::find(vertices.begin(), vertices.end(), rootVertex));
                std::vector<int> connectedVertices = {rootVertex};
                for (int currentVertex : vertices) {
                    int parentVertex = choice(connectedVertices);
                    parentArray[currentVertex] = parentVertex;
                    WeightType weight = generateWeight<WeightType>(weightGenerator, parentVertex, currentVertex);
                    tree.addEdge(parentVertex, currentVertex, weight);
                    connectedVertices.push_back(currentVertex);
                }
            }
            return tree;
        }
        /**
         * @brief Generate a chain (path graph)
         * @param vertexCount Number of vertices
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges (not used for chains)
         * @param allowSelfLoop Whether to allow self loops (not used for chains)
         * @param ensureConnected Whether to ensure graph is connected (always true for chains)
         * @return Graph<WeightType> Generated chain
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genChain(int vertexCount, Func &&weightGenerator = WeightType{1},
                                   bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            Graph<WeightType> chain(vertexCount);
            for (int i = 1; i < vertexCount; ++i) {
                WeightType weight = generateWeight<WeightType>(weightGenerator, i, i + 1);
                chain.addEdge(i, i + 1, weight);
            }
            return chain;
        }
        /**
         * @brief Generate a star graph
         * @param vertexCount Number of vertices
         * @param centerVertex Center vertex (1-indexed), 0 for random
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges (not used for stars)
         * @param allowSelfLoop Whether to allow self loops (not used for stars)
         * @param ensureConnected Whether to ensure graph is connected (always true for stars)
         * @return Graph<WeightType> Generated star
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genStar(int vertexCount, int centerVertex = 0,
                                  Func &&weightGenerator = WeightType{1},
                                  bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            if (centerVertex == 0) centerVertex = mod1(vertexCount);
            Graph<WeightType> star(vertexCount);
            for (int i = 1; i <= vertexCount; ++i) {
                if (i != centerVertex) {
                    WeightType weight = generateWeight<WeightType>(weightGenerator, centerVertex, i);
                    star.addEdge(centerVertex, i, weight);
                }
            }
            return star;
        }
        /**
         * @brief Generate a cycle graph (single cycle)
         * @param vertexCount Number of vertices
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges (not used for cycles)
         * @param allowSelfLoop Whether to allow self loops (not used for cycles)
         * @param ensureConnected Whether to ensure graph is connected (always true for cycles)
         * @return Graph<WeightType> Generated cycle
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genCycle(int vertexCount, Func &&weightGenerator = WeightType{1},
                                   bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            Graph<WeightType> cycle(vertexCount);
            for (int i = 1; i < vertexCount; ++i) {
                WeightType weight = generateWeight<WeightType>(weightGenerator, i, i + 1);
                cycle.addEdge(i, i + 1, weight);
            }
            // Close the cycle
            WeightType weight = generateWeight<WeightType>(weightGenerator, vertexCount, 1);
            cycle.addEdge(vertexCount, 1, weight);
            return cycle;
        }
        /**
         * @brief Generate a complete graph
         * @param vertexCount Number of vertices
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges (not used for complete graphs)
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected (always true for complete graphs)
         * @return Graph<WeightType> Generated complete graph
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genCompleteGraph(int vertexCount, Func &&weightGenerator = WeightType{1},
                                           bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            Graph<WeightType> complete(vertexCount);
            for (int i = 1; i <= vertexCount; ++i) {
                for (int j = i + 1; j <= vertexCount; ++j) {
                    WeightType weight = generateWeight<WeightType>(weightGenerator, i, j);
                    complete.addEdge(i, j, weight);
                }
            }
            return complete;
        }
        /**
         * @brief Generate a bipartite graph
         * @param leftCount Number of vertices in left partition
         * @param rightCount Number of vertices in right partition
         * @param edgeCount Number of edges between partitions
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected
         * @return Graph<WeightType> Generated bipartite graph
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genBipartiteGraph(int leftCount, int rightCount, int edgeCount,
                                            Func &&weightGenerator = WeightType{1},
                                            bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = false) {
            int vertexCount = leftCount + rightCount;
            Graph<WeightType> bipartite(vertexCount);
            std::set<std::pair<int, int>> existingEdges;
            // If ensureConnected is true, first ensure connectivity between partitions
            if (ensureConnected) {
                // Connect each vertex in left partition to at least one vertex in right partition
                for (int i = 1; i <= leftCount; ++i) {
                    int v = leftCount + mod1(rightCount);
                    existingEdges.insert({i, v});
                    WeightType weight = generateWeight<WeightType>(weightGenerator, i, v);
                    bipartite.addEdge(i, v, weight);
                }
                // Connect each vertex in right partition to at least one vertex in left partition
                for (int i = leftCount + 1; i <= vertexCount; ++i) {
                    int u = mod1(leftCount);
                    if (!allowMultiEdges && existingEdges.count({u, i})) continue;
                    existingEdges.insert({u, i});
                    WeightType weight = generateWeight<WeightType>(weightGenerator, u, i);
                    bipartite.addEdge(u, i, weight);
                }
            }
            // Add remaining edges
            while (bipartite.getEdgeCount() < edgeCount) {
                int u = mod1(leftCount);              // Left partition: 1..leftCount
                int v = leftCount + mod1(rightCount); // Right partition: leftCount+1..leftCount+rightCount
                if (!allowMultiEdges && existingEdges.count({u, v})) continue;
                existingEdges.insert({u, v});
                WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                bipartite.addEdge(u, v, weight);
            }
            return bipartite;
        }
        /**
         * @brief Generate a random undirected graph
         * @param vertexCount Number of vertices
         * @param edgeCount Number of edges
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected
         * @return Graph<WeightType> Generated undirected graph
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genUndirectedGraph(int vertexCount, int edgeCount,
                                             Func &&weightGenerator = WeightType{1},
                                             bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = false) {
            Graph<WeightType> graph(vertexCount);
            std::set<std::pair<int, int>> existingEdges;
            // If ensureConnected is true, first generate a spanning tree
            if (ensureConnected) {
                auto spanningTree = genTree<WeightType>(vertexCount, 1, false, weightGenerator);
                for (const auto &edge : spanningTree.getEdgeList()) {
                    int u = std::get<0>(edge), v = std::get<1>(edge);
                    WeightType w = std::get<2>(edge);
                    graph.addEdge(u, v, w);
                    if (!allowMultiEdges) {
                        existingEdges.insert({std::min(u, v), std::max(u, v)});
                    }
                }
            }
            // Add remaining edges
            while (graph.getEdgeCount() < edgeCount) {
                int u = mod1(vertexCount), v = mod1(vertexCount);
                // Check self loop
                if (!allowSelfLoop && u == v) continue;
                // For undirected graph, consider edge as unordered pair
                int minVertex = std::min(u, v);
                int maxVertex = std::max(u, v);
                // Check multi edges
                if (!allowMultiEdges) {
                    if (existingEdges.count({minVertex, maxVertex})) continue;
                    existingEdges.insert({minVertex, maxVertex});
                }
                WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                graph.addEdge(u, v, weight);
            }
            return graph;
        }
        /**
         * @brief Generate a random directed graph
         * @param vertexCount Number of vertices
         * @param edgeCount Number of edges
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected
         * @return Graph<WeightType> Generated directed graph
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genDirectedGraph(int vertexCount, int edgeCount,
                                           Func &&weightGenerator = WeightType{1},
                                           bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = false) {
            Graph<WeightType> graph(vertexCount);
            std::set<std::pair<int, int>> existingEdges;
            // If ensureConnected is true, first generate a spanning tree (as directed edges)
            if (ensureConnected) {
                auto spanningTree = genTree<WeightType>(vertexCount, 1, false, weightGenerator);
                for (const auto &edge : spanningTree.getEdgeList()) {
                    int u = std::get<0>(edge), v = std::get<1>(edge);
                    WeightType w = std::get<2>(edge);
                    graph.addEdge(u, v, w);
                    if (!allowMultiEdges) {
                        existingEdges.insert({u, v});
                    }
                }
            }
            // Add remaining edges
            while (graph.getEdgeCount() < edgeCount) {
                int u = mod1(vertexCount), v = mod1(vertexCount);
                // Check self loop
                if (!allowSelfLoop && u == v) continue;
                // Check multi edges
                if (!allowMultiEdges && existingEdges.count({u, v})) continue;
                if (!allowMultiEdges) {
                    existingEdges.insert({u, v});
                }
                WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                graph.addEdge(u, v, weight);
            }
            return graph;
        }
        /**
         * @brief Generate a Directed Acyclic Graph (DAG)
         * @param vertexCount Number of vertices
         * @param edgeCount Number of edges
         * @param weightGenerator Weight generation function, pair, or constant value
         * @param allowMultiEdges Whether to allow multiple edges
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected
         * @return Graph<WeightType> Generated DAG
         */
        template <typename WeightType = int, typename Func>
        Graph<WeightType> genDAG(int vertexCount, int edgeCount,
                                 Func &&weightGenerator = WeightType{1},
                                 bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = false) {
            Graph<WeightType> dag(vertexCount);
            std::vector<int> topologicalOrder(vertexCount);
            for (int i = 0; i < vertexCount; ++i) topologicalOrder[i] = i + 1;
            shuffleVec(topologicalOrder);
            std::set<std::pair<int, int>> existingEdges;
            // If ensureConnected is true, first generate a spanning tree that respects topological order
            if (ensureConnected) {
                // Create a tree where edges only go from earlier to later vertices in topological order
                for (int i = 1; i < vertexCount; ++i) {
                    int parentIndex = modx(i);
                    int u = topologicalOrder[parentIndex];
                    int v = topologicalOrder[i];
                    WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                    dag.addEdge(u, v, weight);
                    if (!allowMultiEdges) {
                        existingEdges.insert({u, v});
                    }
                }
            }
            // Add remaining edges, ensuring they respect topological order
            while (dag.getEdgeCount() < edgeCount) {
                int i = modx(vertexCount), j = modx(vertexCount);
                if (i >= j) continue; // Ensure edge goes from earlier to later in topological order
                int u = topologicalOrder[i], v = topologicalOrder[j];
                // Check multi edges
                if (!allowMultiEdges && existingEdges.count({u, v})) continue;
                if (!allowMultiEdges) {
                    existingEdges.insert({u, v});
                }
                WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                dag.addEdge(u, v, weight);
            }
            return dag;
        }
        /**
         * @brief Generate a graph that challenges SPFA algorithm
         * @param vertexCount Number of vertices
         * @param edgeCount Number of edges
         * @param weightGenerator Weight generation function for normal edges
         * @param negativeWeightGenerator Weight generation function for negative edges
         * @param negativeRatio Ratio of negative weight edges
         * @param allowMultiEdges Whether to allow multiple edges
         * @param allowSelfLoop Whether to allow self loops
         * @param ensureConnected Whether to ensure graph is connected
         * @return Graph<WeightType> Generated SPFA-challenging graph
         */
        template <typename WeightType = int, typename Func1, typename Func2>
        Graph<WeightType> genSpfaKiller(int vertexCount, int edgeCount, Func1 &&weightGenerator = WeightType{1}, Func2 &&negativeWeightGenerator = [] { return WeightType{-1}; }, double negativeRatio = 0.1, bool allowMultiEdges = false, bool allowSelfLoop = false, bool ensureConnected = true) {
            Graph<WeightType> graph(vertexCount);
            // Ensure we have enough edges for connectivity
            if (edgeCount < vertexCount - 1) {
                // If not enough edges for a chain, just generate what we can
                for (int i = 1; i < vertexCount && graph.getEdgeCount() < edgeCount; ++i) {
                    WeightType weight = generateWeight<WeightType>(weightGenerator, i, i + 1);
                    graph.addEdge(i, i + 1, weight);
                }
                return graph;
            }
            // Create a chain to ensure connectivity
            for (int i = 1; i < vertexCount; ++i) {
                WeightType weight = generateWeight<WeightType>(weightGenerator, i, i + 1);
                graph.addEdge(i, i + 1, weight);
            }
            int remainingEdges = edgeCount - (vertexCount - 1);
            if (remainingEdges <= 0) return graph;
            // Add negative weight edges in a way that creates many relaxations
            int negativeEdgeCount = static_cast<int>(remainingEdges * negativeRatio);
            for (int i = 0; i < negativeEdgeCount; ++i) {
                // Ensure u is at least 2 and v < u to create backward edges
                int u = rangeRand(std::min(vertexCount, 3), vertexCount); // Start from at least vertex 3
                int v = rangeRand(1, std::max(1, u - 1));                 // Ensure v is at least 1 and less than u
                WeightType weight = generateWeight<WeightType>(negativeWeightGenerator, u, v);
                graph.addEdge(u, v, weight);
            }
            // Add remaining edges
            for (int i = 0; i < remainingEdges - negativeEdgeCount; ++i) {
                int u = mod1(vertexCount), v = mod1(vertexCount);
                if (!allowSelfLoop && u == v) continue;
                WeightType weight = generateWeight<WeightType>(weightGenerator, u, v);
                graph.addEdge(u, v, weight);
            }
            return graph;
        }
    } // namespace graph
    /**
     * @brief Data generator for competitive programming problems
     */
    /**
     * @brief Data generator for competitive programming problems
     */
    struct DataGenerator {
        /**
         * @brief Run data generation process
         * @param dataName Base name for data files
         * @param testCount Number of test cases
         * @param sampleCount Number of sample cases
         * @param stdName Standard program name
         * @param dataFolderName Data folder name
         * @param sampleFolderName Sample folder name
         * @param makeDataFunc Function to generate test data
         * @param makeSampleFunc Function to generate sample data (default does nothing)
         * @param testGroupCountFunc Function that returns number of test groups for each test case
         * @param sampleGroupCountFunc Function that returns number of test groups for each sample
         * @param outputTestCaseId Whether to output test case ID in the first line
         */
        static void run(
            const std::string &dataName = "data",
            int testCount = 10,
            int sampleCount = 0,
            const std::string &stdName = "std",
            const std::string &dataFolderName = "data",
            const std::string &sampleFolderName = "down",
            std::function<void(int)> makeDataFunc = [](int) {
                // Default implementation: do nothing
            },
            std::function<void(int)> makeSampleFunc = [](int) {
                // Default implementation: do nothing
            },
            std::function<int(int)> testGroupCountFunc = [](int) { return 0; }, std::function<int(int)> sampleGroupCountFunc = [](int) { return 0; }, bool outputTestCaseId = false) {
            const std::string dataPath = "./" + dataFolderName + "/";
            const std::string samplePath = "./" + sampleFolderName + "/";
            mkdir(dataFolderName);
            mkdir(sampleFolderName);
            // Generate test data files
            for (int testCaseId = 1; testCaseId <= testCount; ++testCaseId) {
                const auto taskName = dataName + std::to_string(testCaseId);
                generateFile(dataPath + taskName + ".in", [testCaseId, testGroupCountFunc, outputTestCaseId, makeDataFunc] { generateMultiTestData(testCaseId, testGroupCountFunc, outputTestCaseId, makeDataFunc); });
                generateAnswer(dataPath + taskName, stdName);
            }
            // Generate sample data files
            for (int testCaseId = 1; testCaseId <= sampleCount; ++testCaseId) {
                const auto taskName = std::to_string(testCaseId);
                generateFile(samplePath + taskName + ".in", [testCaseId, sampleGroupCountFunc, outputTestCaseId, makeSampleFunc] { generateMultiTestData(testCaseId, sampleGroupCountFunc, outputTestCaseId, makeSampleFunc); });
                generateAnswer(samplePath + taskName, stdName);
            }
        }

    private:
        /**
         * @brief Generate multi-test data file
         * @param testCaseId Test case ID
         * @param groupCountFunc Function that returns number of test groups
         * @param outputTestCaseId Whether to output test case ID
         * @param dataGenFunc Data generation function
         */
        static void generateMultiTestData(int testCaseId,
                                          std::function<int(int)> groupCountFunc,
                                          bool outputTestCaseId,
                                          std::function<void(int)> dataGenFunc) {
            int groupCount = groupCountFunc(testCaseId);
            if (groupCount == 0) {
                // Single test case
                if (outputTestCaseId) {
                    std::cout << testCaseId << "\n";
                }
                dataGenFunc(testCaseId);
            } else {
                // Multiple test cases
                if (outputTestCaseId) {
                    std::cout << groupCount << " " << testCaseId << "\n";
                } else {
                    std::cout << groupCount << "\n";
                }
                for (int i = 0; i < groupCount; ++i) {
                    dataGenFunc(testCaseId);
                }
            }
        }
        /**
         * @brief Generate input file
         * @param fileName File name
         * @param genFunc Generation function
         */
        template <typename Func>
        static void generateFile(const std::string &fileName, Func genFunc) {
            freopen(fileName.c_str(), "wb", stdout);
            genFunc();
            std::cerr << "Generated: " << fileName << "\n";
            fflush(stdout);
        }
        /**
         * @brief Generate answer file using standard program
         * @param filePrefix File prefix without extension
         * @param stdName Standard program name
         * @throw std::runtime_error if std program fails with exit code
         */
        static void generateAnswer(const std::string &filePrefix, const std::string &stdName) {
            // 构建命令 - 跨平台兼容
            std::string cmd;
#ifdef _WIN32
            // Windows 命令语法
            cmd = stdName + " < \"" + filePrefix + ".in\" > \"" + filePrefix + ".ans\"";
#else
            // Unix/Linux 命令语法
            cmd = "./" + stdName + " < \"" + filePrefix + ".in\" > \"" + filePrefix + ".ans\"";
#endif
            // 执行命令并检查返回值
            int ret = system(cmd.c_str());
            if (ret != 0) {
                std::string errorMsg = "Standard program failed with exit code " + std::to_string(1u * ret);
                errorMsg += " while generating answer for: " + filePrefix;
                throw std::runtime_error(errorMsg);
            }
            std::cerr << "Generated answer: " << filePrefix << ".ans\n";
        }
    };
} // namespace util

快速开始

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
#include "datagenerator.hpp"
using namespace util;

int main() {
    setSeed(time(0));  // 设置随机种子

    // 生成 10 个测试点
    DataGenerator::run("test", 10, 2, "std", "data", "sample",
        [](int testId) {
            int n = rangeRand(1, 100);
            println(n);
            auto arr = genArr(n, 100, mod1<int>);
            printArr(arr);
        }
    );
}

随机数生成

setSeed(seed)

用途:设置随机数种子

参数

  • seed:随机种子,相同种子产生相同随机序列

1
2
setSeed(123);        // 固定种子,用于对拍
setSeed(time(0));    // 时间种子,每次不同

modx(x)

用途:生成 [0, x-1] 的随机整数

参数

  • x:上限(不包含)

1
2
int idx = modx(5);   // 0,1,2,3,4 中的一个
char c = 'a' + modx(26);  // 随机小写字母

mod1(x)

用途:生成 [1, x] 的随机整数

参数

  • x:上限(包含)

1
2
int dice = mod1(6);  // 骰子:1~6
int n = mod1(100);   // 数据规模:1~100

rangeRand(l, r)

用途:生成 [l, r] 的随机整数

参数

  • l:下限
  • r:上限

1
2
int x = rangeRand(-100, 100);  // -100~100
int len = rangeRand(5, 20);    // 字符串长度5~20

randRange(L, R)

用途:生成 [L, R] 内的随机区间 [l, r]

参数

  • L, R:区间范围

返回值pair<l, r>,保证 l <= r

1
2
auto [l, r] = randRange(1, n);  // 生成查询区间
cout << l << " " << r << endl;

randChar()

用途:生成随机小写字母

返回值'a' ~ 'z'

1
2
char c = randChar();  // 随机字母
string s = genStr(10, randChar);  // 10个随机字母

数组与字符串

genArr(len, lim, genFunc)

用途:生成指定长度的数组

参数

  • len:数组长度
  • lim:传递给生成函数的参数
  • genFunc:元素生成函数

1
2
3
4
5
6
7
8
9
// 10个[1,100]的随机数
auto arr = genArr(10, 100, [](int x) {
    return rangeRand(1, x);
});

// 5个随机字符
auto chars = genArr(5, 'z', [](char lim) {
    return 'a' + modx(26);
});

genStr(len, genChar)

用途:生成随机字符串

参数

  • len:字符串长度
  • genChar:字符生成函数,默认小写字母

1
2
3
4
5
6
7
string s1 = genStr(10);                    // 10个小写字母
string s2 = genStr(5, []() {               // 5个数字
    return '0' + modx(10);
});
string s3 = genStr(8, []() {               // 8个大写字母
    return 'A' + modx(26);
});

choice(container)

用途:从容器中随机选择一个元素

参数

  • containervectorarray

1
2
3
4
5
vector<int> v = {1,2,3,4,5};
int x = choice(v);  // 随机选一个

vector<string> ops = {"add", "del", "query"};
string op = choice(ops);  // 随机操作

shuffleVec(vec)

用途:随机打乱 vector

参数

  • vec:要打乱的 vector

1
2
vector<int> perm = {1,2,3,4,5};
shuffleVec(perm);  // 随机排列

printArr(arr, split, end)

用途:输出数组

参数

  • arr:要输出的数组
  • split:元素分隔符,默认空格
  • end:结束符,默认换行

1
2
3
vector<int> arr = {1,2,3,4,5};
printArr(arr);          // 输出: 1 2 3 4 5
printArr(arr, ", ");    // 输出: 1, 2, 3, 4, 5

println(args...)

用途:输出多个值,空格分隔

参数:可变参数

1
2
println(1, "hello", 3.14);  // 输出: 1 hello 3.14
println(n, m);              // 输出: n m

图论生成

Graph 类基础

1
2
3
4
5
6
7
8
9
using namespace util::graph;

Graph<int> g(5);        // 5个顶点的图
g.addEdge(1, 2, 10);    // 添加边1->2,权重10
g.addEdge(2, 3);        // 添加边2->3,权重1

// 输出格式
g.printWithInfo(true);  // 输出: n m 和所有边(带权重)
g.printTree(true);      // 输出: n 和所有边(带权重)

genTree(n, root, parentLess, weight, ...)

用途:生成随机树

参数

  • n:顶点数
  • root:根节点,默认1
  • parentLess:是否要求父节点编号小于子节点
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
3
4
auto tree1 = genTree(10);                   // 随机树
auto tree2 = genTree(10, 1, true, {1, 100}); // 父节点小于子节点,权重1~100

tree1.printTree(true);  // 输出树

genChain(n, weight, ...)

用途:生成链

参数

  • n:顶点数
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
auto chain = genChain(10, {1,50});  // 10个顶点的链,权重1~50
chain.printWithInfo(true);

genStar(n, center, weight, ...)

用途:生成菊花图(星图)

参数

  • n:顶点数
  • center:中心节点,0 表示随机
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
3
auto star1 = genStar(10);           // 随机中心
auto star2 = genStar(10, 5);        // 中心为5
star1.printWithInfo(true);

genCycle(n, weight, ...)

用途:生成环

参数

  • n:顶点数
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
auto cycle = genCycle(10, {1, 100});  // 10个顶点的环
cycle.printWithInfo(true);

genCompleteGraph(n, weight, ...)

用途:生成完全图

参数

  • n:顶点数
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
auto complete = genCompleteGraph(5);  // 5个顶点的完全图
complete.printWithInfo(true);

genBipartiteGraph(left, right, m, weight, ...)

用途:生成二分图

参数

  • left:左部点数
  • right:右部点数
  • m:边数
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数

1
2
3
// 5左5右10条边的二分图
auto bipartite = genBipartiteGraph(5, 5, 10, {1, 100});
bipartite.printWithInfo(true);

genUndirectedGraph(n, m, weight, multi, selfLoop, connected)

用途:生成无向图

参数

  • n:顶点数
  • m:边数
  • weight:权重生成器,可以是一个 pair 表示范围,可以是一个固定的数,也可以是一个函数
  • multi:是否允许多重边
  • selfLoop:是否允许自环
  • connected:是否连通

1
2
3
// 10顶点15边的连通无向图
auto graph = genUndirectedGraph(10, 15, {1, 100}, false, false, true);
graph.printWithInfo(true);

genDirectedGraph(n, m, weight, ...)

用途:生成有向图

参数:同无向图

1
2
auto digraph = genDirectedGraph(10, 20, {1, 50});  // 10顶点20边有向图
digraph.printWithInfo(true);

genDAG(n, m, weight, ...)

用途:生成有向无环图

参数:同有向图

1
2
auto dag = genDAG(10, 15, {1, 100});  // 10顶点15边DAG
dag.printWithInfo(true);

genSpfaKiller(n, m, posWeight, negWeight, negRatio, ...)

用途:生成卡 SPFA 的图

参数

  • n, m:顶点和边数
  • posWeight:正权边权重
  • negWeight:负权边权重
  • negRatio:负权边比例

1
2
3
// 100顶点200边,20%负权边
auto killer = genSpfaKiller(100, 200, {1, 10}, []{ return -1; }, 0.2);
killer.printWithInfo(true);

printParentArray(parent, root)

用途:输出父节点数组

参数

  • parent:父节点数组,parent[i] 是节点 i+1 的父节点
  • root:根节点,默认1

1
2
vector<int> parent = {0, 1, 1, 2, 2};  // 节点2~5的父节点
Graph<>::printParentArray(parent);  // 输出: 1 1 2 2

数据生成框架

DataGenerator::run(...)

用途:运行完整数据生成流程

参数

  • dataName:数据文件名前缀
  • testCount:测试数据数量
  • sampleCount:样例数据数量
  • stdName:标程名称
  • dataFolder:测试数据目录
  • sampleFolder:样例数据目录
  • makeDataFunc:测试数据生成函数,默认啥也不做
  • makeSampleFunc:样例数据生成函数,默认啥也不做
  • testGroupFunc:测试数据组数函数,传入测试点编号,需要返回一个数表示多测组数(0 表示不多测),默认返回 0
  • sampleGroupFunc:样例数据组数函数,传入测试点编号,需要返回一个数表示多测组数(0 表示不多测),默认返回 0
  • outputTestId:是否输出测试点编号

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
DataGenerator::run(
    "data", 10, 2, "std", "data", "sample",
    [](int testId) {
        int n = rangeRand(1, 100);
        println(n);
        // ... 生成数据
    },
    [](int testId) {
        // 样例数据,通常较小
        println(5);
        println(1, 2, 3, 4, 5);
    },
    [](int testId) { return 0; },  // 单组测试
    [](int testId) { return 0; },  // 单组样例
    false
);

综合实例

实例1:生成树相关问题数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
void generateTreeProblem(int testId) {
    using namespace util::graph;

    // 根据测试点调整数据规模
    int n;
    if (testId <= 3) n = rangeRand(5, 10);
    else if (testId <= 6) n = rangeRand(50, 100);
    else n = rangeRand(500, 1000);

    // 生成随机树
    auto tree = genTree(n, 1, false, {1, 100});
    tree.printTree(true);  // 输出树

    // 生成查询
    int q = rangeRand(1, 10);
    println(q);
    for (int i = 0; i < q; i++) {
        int u = mod1(n), v = mod1(n);
        println(u, v);
    }
}

int main() {
    DataGenerator::run("tree", 10, 2, "sol_tree", "data", "sample", generateTreeProblem);
    return 0;
}

实例2:生成图论最短路径数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
void generateShortestPath(int testId) {
    using namespace util::graph;

    int n, m;
    if (testId <= 3) {
        n = rangeRand(5, 10);
        m = rangeRand(5, 20);
    } else {
        n = rangeRand(100, 500);
        m = rangeRand(200, 1000);
    }

    // 生成连通无向图
    auto graph = genUndirectedGraph(n, m, {1, 1000}, false, false, true);
    graph.printWithInfo(true);

    // 起点终点
    int s = mod1(n), t = mod1(n);
    println(s, t);
}

int main() {
    DataGenerator::run("graph", 10, 2, "sol_graph", "data", "sample", generateShortestPath);
    return 0;
}

实例3:生成数组查询问题数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
void generateArrayQuery(int testId) {
    int n, q;
    if (testId <= 3) {
        n = rangeRand(5, 10);
        q = rangeRand(3, 5);
    } else if (testId <= 6) {
        n = rangeRand(100, 500);
        q = rangeRand(10, 20);
    } else {
        n = rangeRand(1000, 10000);
        q = rangeRand(50, 100);
    }

    // 生成数组
    println(n, q);
    auto arr = genArr(n, 1000000, [](int lim) {
        return rangeRand(1, lim);
    });
    printArr(arr);

    // 生成查询
    for (int i = 0; i < q; i++) {
        int op = mod1(2);  // 操作类型
        if (op == 1) {
            // 修改操作
            int pos = mod1(n);
            int val = rangeRand(1, 1000000);
            println(1, pos, val);
        } else {
            // 查询操作
            auto [l, r] = randRange(1, n);
            println(2, l, r);
        }
    }
}

int main() {
    DataGenerator::run("array", 10, 2, "sol_array", "data", "sample", generateArrayQuery);
    return 0;
}

实例4:生成多组测试数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
void generateSingleTestCase(int testId) {
    int n = rangeRand(1, 100);
    println(n);
    auto arr = genArr(n, 100, mod1<int>);
    printArr(arr);
}

int main() {
    DataGenerator::run(
        "multi", 10, 2, "std", "data", "sample",
        generateSingleTestCase,
        generateSingleTestCase,
        [](int testId) {
            // 动态决定每组数据的测试用例数量
            if (testId <= 5) return rangeRand(1, 3);
            else return rangeRand(5, 10);
        },
        [](int testId) { return 1; },  // 样例只有1组
        true  // 输出测试ID
    );
    return 0;
}

实例5:特殊构造数据

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
void generateSpecialCases(int testId) {
    using namespace util::graph;

    switch (testId) {
        case 1:
            // 最小数据
            println(1);
            println(1);
            break;

        case 2:
            // 链状数据
            auto chain = genChain(1000, {1, 100});
            chain.printWithInfo(true);
            break;

        case 3:
            // 菊花图
            auto star = genStar(500, 1, {1, 1000});
            star.printWithInfo(true);
            break;

        case 4:
            // 完全图
            auto complete = genCompleteGraph(100, {1, 100});
            complete.printWithInfo(true);
            break;

        default:
            // 正常随机数据
            int n = rangeRand(100, 500);
            int m = rangeRand(n - 1, min(5000, n * (n - 1) / 2));
            auto graph = genUndirectedGraph(n, m, {1, 1000});
            graph.printWithInfo(true);
    }
}

int main() {
    DataGenerator::run("special", 10, 0, "std", "data", "", generateSpecialCases);
    return 0;
}