1
/**
2
 * Try to detect typos in identifiers.
3
 *
4
 * Copyright: Copyright (C) 1999-2020 by The D Language Foundation, All Rights Reserved
5
 * Authors:   Walter Bright, http://www.digitalmars.com
6
 * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
7
 * Source:    $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/root/speller.d, root/_speller.d)
8
 * Documentation:  https://dlang.org/phobos/dmd_root_speller.html
9
 * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/root/speller.d
10
 */
11

12
module dmd.root.speller;
13

14
import core.stdc.stdlib;
15
import core.stdc.string;
16

17
immutable string idchars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_";
18

19
/**************************************************
20
 * combine a new result from the spell checker to
21
 * find the one with the closest symbol with
22
 * respect to the cost defined by the search function
23
 * Input/Output:
24
 *      p       best found spelling (NULL if none found yet)
25
 *      cost    cost of p (int.max if none found yet)
26
 * Input:
27
 *      np      new found spelling (NULL if none found)
28
 *      ncost   cost of np if non-NULL
29
 * Returns:
30
 *      true    if the cost is less or equal 0
31
 *      false   otherwise
32
 */
33
private bool combineSpellerResult(T)(ref T p, ref int cost, T np, int ncost)
34
{
35 1
    if (np && ncost < cost)
36
    {
37 1
        p = np;
38 1
        cost = ncost;
39 1
        if (cost <= 0)
40 1
            return true;
41
    }
42 1
    return false;
43
}
44

45
private auto spellerY(alias dg)(const(char)[] seed, size_t index, ref int cost)
46
{
47 1
    if (!seed.length)
48 0
        return null;
49 1
    char[30] tmp;
50 1
    char[] buf;
51 1
    if (seed.length <= tmp.sizeof - 1)
52 1
        buf = tmp;
53
    else
54
    {
55 0
        buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char
56 0
        if (!buf.ptr)
57 0
            return null; // no matches
58
    }
59 1
    buf[0 .. index] = seed[0 .. index];
60 1
    cost = int.max;
61 1
    searchFunctionType!dg p = null;
62 1
    int ncost;
63
    /* Delete at seed[index] */
64 1
    if (index < seed.length)
65
    {
66 1
        buf[index .. seed.length - 1] = seed[index + 1 .. $];
67 1
        auto np = dg(buf[0 .. seed.length - 1], ncost);
68 1
        if (combineSpellerResult(p, cost, np, ncost))
69 1
            return p;
70
    }
71
    /* Substitutions */
72 1
    if (index < seed.length)
73
    {
74 1
        buf[0 .. seed.length] = seed;
75 1
        foreach (s; idchars)
76
        {
77 1
            buf[index] = s;
78
            //printf("sub buf = '%s'\n", buf);
79 1
            auto np = dg(buf[0 .. seed.length], ncost);
80 1
            if (combineSpellerResult(p, cost, np, ncost))
81 1
                return p;
82
        }
83
    }
84
    /* Insertions */
85 1
    buf[index + 1 .. seed.length + 1] = seed[index .. $];
86 1
    foreach (s; idchars)
87
    {
88 1
        buf[index] = s;
89
        //printf("ins buf = '%s'\n", buf);
90 1
        auto np = dg(buf[0 .. seed.length + 1], ncost);
91 1
        if (combineSpellerResult(p, cost, np, ncost))
92 1
            return p;
93
    }
94 1
    return p; // return "best" result
95
}
96

97
private auto spellerX(alias dg)(const(char)[] seed, bool flag)
98
{
99 1
    if (!seed.length)
100 0
        return null;
101 1
    char[30] tmp;
102 1
    char[] buf;
103 1
    if (seed.length <= tmp.sizeof - 1)
104 1
        buf = tmp;
105
    else
106
    {
107 1
        buf = (cast(char*)alloca(seed.length + 1))[0 .. seed.length + 1]; // leave space for extra char
108
    }
109 1
    int cost = int.max, ncost;
110 1
    searchFunctionType!dg p = null, np;
111
    /* Deletions */
112 1
    buf[0 .. seed.length - 1] = seed[1 .. $];
113 1
    for (size_t i = 0; i < seed.length; i++)
114
    {
115
        //printf("del buf = '%s'\n", buf);
116 1
        if (flag)
117 1
            np = spellerY!dg(buf[0 .. seed.length - 1], i, ncost);
118
        else
119 1
            np = dg(buf[0 .. seed.length - 1], ncost);
120 1
        if (combineSpellerResult(p, cost, np, ncost))
121 1
            return p;
122 1
        buf[i] = seed[i];
123
    }
124
    /* Transpositions */
125 1
    if (!flag)
126
    {
127 1
        buf[0 .. seed.length] = seed;
128 1
        for (size_t i = 0; i + 1 < seed.length; i++)
129
        {
130
            // swap [i] and [i + 1]
131 1
            buf[i] = seed[i + 1];
132 1
            buf[i + 1] = seed[i];
133
            //printf("tra buf = '%s'\n", buf);
134 1
            if (combineSpellerResult(p, cost, dg(buf[0 .. seed.length], ncost), ncost))
135 1
                return p;
136 1
            buf[i] = seed[i];
137
        }
138
    }
139
    /* Substitutions */
140 1
    buf[0 .. seed.length] = seed;
141 1
    for (size_t i = 0; i < seed.length; i++)
142
    {
143 1
        foreach (s; idchars)
144
        {
145 1
            buf[i] = s;
146
            //printf("sub buf = '%s'\n", buf);
147 1
            if (flag)
148 1
                np = spellerY!dg(buf[0 .. seed.length], i + 1, ncost);
149
            else
150 1
                np = dg(buf[0 .. seed.length], ncost);
151 1
            if (combineSpellerResult(p, cost, np, ncost))
152 1
                return p;
153
        }
154 1
        buf[i] = seed[i];
155
    }
156
    /* Insertions */
157 1
    buf[1 .. seed.length + 1] = seed;
158 1
    for (size_t i = 0; i <= seed.length; i++) // yes, do seed.length+1 iterations
159
    {
160 1
        foreach (s; idchars)
161
        {
162 1
            buf[i] = s;
163
            //printf("ins buf = '%s'\n", buf);
164 1
            if (flag)
165 1
                np = spellerY!dg(buf[0 .. seed.length + 1], i + 1, ncost);
166
            else
167 1
                np = dg(buf[0 .. seed.length + 1], ncost);
168 1
            if (combineSpellerResult(p, cost, np, ncost))
169 1
                return p;
170
        }
171 1
        if (i < seed.length)
172 1
            buf[i] = seed[i];
173
    }
174 1
    return p; // return "best" result
175
}
176

177
/**************************************************
178
 * Looks for correct spelling.
179
 * Currently only looks a 'distance' of one from the seed[].
180
 * This does an exhaustive search, so can potentially be very slow.
181
 * Params:
182
 *      seed = wrongly spelled word
183
 *      dg = search delegate
184
 * Returns:
185
 *      null = no correct spellings found, otherwise
186
 *      the value returned by dg() for first possible correct spelling
187
 */
188
auto speller(alias dg)(const(char)[] seed)
189
if (isSearchFunction!dg)
190
{
191 1
    size_t maxdist = seed.length < 4 ? seed.length / 2 : 2;
192 1
    for (int distance = 0; distance < maxdist; distance++)
193
    {
194 1
        auto p = spellerX!dg(seed, distance > 0);
195 1
        if (p)
196 1
            return p;
197
        //      if (seedlen > 10)
198
        //          break;
199
    }
200 1
    return null; // didn't find it
201
}
202

203
enum isSearchFunction(alias fun) = is(searchFunctionType!fun);
204
alias searchFunctionType(alias fun) = typeof(() {int x; return fun("", x);}());
205

206
unittest
207
{
208
    static immutable string[][] cases =
209
    [
210
        ["hello", "hell", "y"],
211
        ["hello", "hel", "y"],
212
        ["hello", "ello", "y"],
213
        ["hello", "llo", "y"],
214
        ["hello", "hellox", "y"],
215
        ["hello", "helloxy", "y"],
216
        ["hello", "xhello", "y"],
217
        ["hello", "xyhello", "y"],
218
        ["hello", "ehllo", "y"],
219
        ["hello", "helol", "y"],
220
        ["hello", "abcd", "n"],
221
        ["hello", "helxxlo", "y"],
222
        ["hello", "ehlxxlo", "n"],
223
        ["hello", "heaao", "y"],
224
        ["_123456789_123456789_123456789_123456789", "_123456789_123456789_123456789_12345678", "y"],
225
    ];
226
    //printf("unittest_speller()\n");
227

228 1
    string dgarg;
229

230
    string speller_test(const(char)[] s, ref int cost)
231
    {
232 1
        assert(s[$-1] != '\0');
233
        //printf("speller_test(%s, %s)\n", dgarg, s);
234 1
        cost = 0;
235 1
        if (dgarg == s)
236 1
            return dgarg;
237 1
        return null;
238
    }
239

240 1
    dgarg = "hell";
241 1
    auto p = speller!speller_test("hello");
242 1
    assert(p !is null);
243 1
    foreach (testCase; cases)
244
    {
245
        //printf("case [%d]\n", i);
246 1
        dgarg = testCase[1];
247 1
        auto p2 = speller!speller_test(testCase[0]);
248 1
        if (p2)
249 1
            assert(testCase[2][0] == 'y');
250
        else
251 1
            assert(testCase[2][0] == 'n');
252
    }
253
    //printf("unittest_speller() success\n");
254
}

Read our documentation on viewing source code .

Loading