wooorm / franc

@@ -1,4 +1,8 @@
Loading
1 1
#!/usr/bin/env node
2 +
/**
3 +
 * @typedef {import('franc').Options} Options
4 +
 */
5 +
2 6
import {createRequire} from 'node:module'
3 7
import meow from 'meow'
4 8
import {franc, francAll} from 'franc'
@@ -49,30 +53,30 @@
Loading
49 53
const value = cli.input.join(' ').trim()
50 54
const flags = cli.flags
51 55
52 -
flags.minLength = Number(flags.minLength) || null
53 -
54 -
flags.whitelist = list(flags.whitelist)
55 -
flags.blacklist = list(flags.blacklist)
56 -
flags.only = flags.whitelist.concat(list(flags.only))
57 -
flags.ignore = flags.blacklist.concat(list(flags.ignore))
56 +
/** @type {Options} */
57 +
const options = {
58 +
  minLength: Number(flags.minLength) || undefined,
59 +
  // @ts-expect-error: legacy.
60 +
  whitelist: list(flags.whitelist),
61 +
  blacklist: list(flags.blacklist),
62 +
  only: list(flags.only),
63 +
  ignore: list(flags.ignore)
64 +
}
58 65
59 66
if (cli.input.length === 0) {
60 67
  process.stdin.resume()
61 68
  process.stdin.setEncoding('utf8')
62 69
  process.stdin.on('data', (data) => {
63 -
    detect(data.trim())
70 +
    detect(String(data).trim())
64 71
  })
65 72
} else {
66 73
  detect(value)
67 74
}
68 75
76 +
/**
77 +
 * @param {string} value
78 +
 */
69 79
function detect(value) {
70 -
  const options = {
71 -
    minLength: flags.minLength,
72 -
    only: flags.only,
73 -
    ignore: flags.ignore
74 -
  }
75 -
76 80
  if (flags.all) {
77 81
    const results = francAll(value, options)
78 82
    let index = -1
@@ -117,6 +121,10 @@
Loading
117 121
  ].join('\n')
118 122
}
119 123
124 +
/**
125 +
 * @param {string|undefined} value
126 +
 * @returns {string[]}
127 +
 */
120 128
function list(value) {
121 129
  return value ? String(value).split(',') : []
122 130
}

@@ -1,3 +1,15 @@
Loading
1 +
/**
2 +
 * @typedef {import('trigram-utils').TrigramTuple} TrigramTuple
3 +
 *
4 +
 * @typedef Options
5 +
 * @property {string[]} [only]
6 +
 *   allow languages
7 +
 * @property {string[]} [ignore]
8 +
 *   disallow languages
9 +
 * @property {number} [minLength=10]
10 +
 *   minimum length to accept
11 +
 */
12 +
1 13
/* Load `trigram-utils`. */
2 14
import {asTuples} from 'trigram-utils'
3 15
@@ -22,16 +34,25 @@
Loading
22 34
const own = {}.hasOwnProperty
23 35
24 36
/* Construct trigram dictionaries. */
37 +
38 +
/** @type {string} */
25 39
let script
26 40
41 +
/** @type {Record<string, Record<string, Record<string, number>>>} */
42 +
const numericData = {}
43 +
27 44
for (script in data) {
28 45
  if (own.call(data, script)) {
29 46
    const languages = data[script]
47 +
    /** @type {string} */
30 48
    let name
31 49
50 +
    numericData[script] = {}
51 +
32 52
    for (name in languages) {
33 53
      if (own.call(languages, name)) {
34 54
        const model = languages[name].split('|')
55 +
        /** @type {Record<string, number>} */
35 56
        const trigrams = {}
36 57
        let weight = model.length
37 58
@@ -39,7 +60,7 @@
Loading
39 60
          trigrams[model[weight]] = weight
40 61
        }
41 62
42 -
        languages[name] = trigrams
63 +
        numericData[script][name] = trigrams
43 64
      }
44 65
    }
45 66
  }
@@ -48,9 +69,12 @@
Loading
48 69
/**
49 70
 * Get the most probable language for the given value.
50 71
 *
51 -
 * @param {string} value - The value to test.
52 -
 * @param {Object} options - Configuration.
53 -
 * @return {string} The most probable language.
72 +
 * @param {string} [value]
73 +
 *   The value to test.
74 +
 * @param {Object} [options]
75 +
 *   Configuration.
76 +
 * @return {string}
77 +
 *  The most probable language.
54 78
 */
55 79
export function franc(value, options) {
56 80
  return francAll(value, options)[0][0]
@@ -60,13 +84,19 @@
Loading
60 84
 * Get a list of probable languages the given value is
61 85
 * written in.
62 86
 *
63 -
 * @param {string} value - The value to test.
64 -
 * @param {Object} options - Configuration.
65 -
 * @return {Array.<Array.<string, number>>} An array
66 -
 *   containing language--distance tuples.
87 +
 * @param {string} [value]
88 +
 *   The value to test.
89 +
 * @param {Options} [options]
90 +
 *   Configuration.
91 +
 * @return {TrigramTuple[]}
92 +
 *   An array containing language—distance tuples.
67 93
 */
68 94
export function francAll(value, options = {}) {
95 +
  /** @type {string[]} */
96 +
  // @ts-expect-error: `whitelist` is from long ago.
69 97
  const only = [...(options.whitelist || []), ...(options.only || [])]
98 +
  /** @type {string[]} */
99 +
  // @ts-expect-error: `blacklist` is from long ago.
70 100
  const ignore = [...(options.blacklist || []), ...(options.ignore || [])]
71 101
  const minLength =
72 102
    options.minLength !== null && options.minLength !== undefined
@@ -84,10 +114,10 @@
Loading
84 114
  const script = getTopScript(value, expressions)
85 115
86 116
  /* One languages exists for the most-used script. */
87 -
  if (!(script[0] in data)) {
117 +
  if (!script[0] || !(script[0] in numericData)) {
88 118
    /* If no matches occured, such as a digit only string,
89 119
     * or because the language is ignored, exit with `und`. */
90 -
    if (script[1] === 0 || !allow(script[0], only, ignore)) {
120 +
    if (!script[0] || script[1] === 0 || !allow(script[0], only, ignore)) {
91 121
      return und()
92 122
    }
93 123
@@ -98,7 +128,7 @@
Loading
98 128
   * normalize the distance values. */
99 129
  return normalize(
100 130
    value,
101 -
    getDistances(asTuples(value), data[script[0]], only, ignore)
131 +
    getDistances(asTuples(value), numericData[script[0]], only, ignore)
102 132
  )
103 133
}
104 134
@@ -106,11 +136,12 @@
Loading
106 136
 * Normalize the difference for each tuple in
107 137
 * `distances`.
108 138
 *
109 -
 * @param {string} value - Value to normalize.
110 -
 * @param {Array.<Array.<string, number>>} distances
111 -
 *   - List of distances.
112 -
 * @return {Array.<Array.<string, number>>} - Normalized
113 -
 *   distances.
139 +
 * @param {string} value
140 +
 *   Value to normalize.
141 +
 * @param {TrigramTuple[]} distances
142 +
 *   List of distances.
143 +
 * @return {TrigramTuple[]}
144 +
 *   Normalized distances.
114 145
 */
115 146
function normalize(value, distances) {
116 147
  const min = distances[0][1]
@@ -128,14 +159,18 @@
Loading
128 159
 * From `scripts`, get the most occurring expression for
129 160
 * `value`.
130 161
 *
131 -
 * @param {string} value - Value to check.
132 -
 * @param {Object.<RegExp>} scripts - Top-Scripts.
133 -
 * @return {Array} Top script and its
134 -
 *   occurrence percentage.
162 +
 * @param {string} value
163 +
 *   Value to check.
164 +
 * @param {Record<string, RegExp>} scripts
165 +
 *   Top-Scripts.
166 +
 * @return {[string|undefined, number]}
167 +
 *   Top script and its occurrence percentage.
135 168
 */
136 169
function getTopScript(value, scripts) {
137 170
  let topCount = -1
171 +
  /** @type {string|undefined} */
138 172
  let topScript
173 +
  /** @type {string} */
139 174
  let script
140 175
141 176
  for (script in scripts) {
@@ -155,9 +190,12 @@
Loading
155 190
/**
156 191
 * Get the occurrence ratio of `expression` for `value`.
157 192
 *
158 -
 * @param {string} value - Value to check.
159 -
 * @param {RegExp} expression - Code-point expression.
160 -
 * @return {number} Float between 0 and 1.
193 +
 * @param {string} value
194 +
 *   Value to check.
195 +
 * @param {RegExp} expression
196 +
 *   Code-point expression.
197 +
 * @return {number}
198 +
 *   Float between 0 and 1.
161 199
 */
162 200
function getOccurrence(value, expression) {
163 201
  const count = value.match(expression)
@@ -166,29 +204,33 @@
Loading
166 204
}
167 205
168 206
/**
169 -
 * Get the distance between an array of trigram--count
207 +
 * Get the distance between an array of trigram—count
170 208
 * tuples, and multiple trigram dictionaries.
171 209
 *
172 -
 * @param {Array.<Array.<string, number>>} trigrams - An
173 -
 *   array containing trigram--count tuples.
174 -
 * @param {Object.<Object>} languages - multiple
175 -
 *   trigrams to test against.
176 -
 * @param {Array.<string>} only - Allowed languages; if
177 -
 *   non-empty, only included languages are kept.
178 -
 * @param {Array.<string>} ignore - Disallowed languages;
179 -
 *   included languages are ignored.
180 -
 * @return {Array.<Array.<string, number>>} An array
181 -
 *   containing language--distance tuples.
210 +
 * @param {TrigramTuple[]} trigrams
211 +
 *   An array containing trigram—count tuples.
212 +
 * @param {Record<string, Record<string, number>>} languages
213 +
 *   Multiple trigrams to test against.
214 +
 * @param {string[]} only
215 +
 *   Allowed languages; if non-empty, only included languages are kept.
216 +
 * @param {string[]} ignore
217 +
 *   Disallowed languages; included languages are ignored.
218 +
 * @return {TrigramTuple[]} An array
219 +
 *   containing language—distance tuples.
182 220
 */
183 221
function getDistances(trigrams, languages, only, ignore) {
184 222
  languages = filterLanguages(languages, only, ignore)
185 223
224 +
  /** @type {TrigramTuple[]} */
186 225
  const distances = []
226 +
  /** @type {string} */
187 227
  let language
188 228
189 -
  for (language in languages) {
190 -
    if (own.call(languages, language)) {
191 -
      distances.push([language, getDistance(trigrams, languages[language])])
229 +
  if (languages) {
230 +
    for (language in languages) {
231 +
      if (own.call(languages, language)) {
232 +
        distances.push([language, getDistance(trigrams, languages[language])])
233 +
      }
192 234
    }
193 235
  }
194 236
@@ -196,14 +238,15 @@
Loading
196 238
}
197 239
198 240
/**
199 -
 * Get the distance between an array of trigram--count
241 +
 * Get the distance between an array of trigram—count
200 242
 * tuples, and a language dictionary.
201 243
 *
202 -
 * @param {Array.<Array.<string, number>>} trigrams - An
203 -
 *   array containing trigram--count tuples.
204 -
 * @param {Object.<number>} model - Object
205 -
 *   containing weighted trigrams.
206 -
 * @return {number} - The distance between the two.
244 +
 * @param {TrigramTuple[]} trigrams
245 +
 *   An array containing trigram—count tuples.
246 +
 * @param {Record<string, number>} model
247 +
 *   Object containing weighted trigrams.
248 +
 * @return {number}
249 +
 *   The distance between the two.
207 250
 */
208 251
function getDistance(trigrams, model) {
209 252
  let distance = 0
@@ -231,21 +274,23 @@
Loading
231 274
 * Filter `languages` by removing languages in
232 275
 * `ignore`, or including languages in `only`.
233 276
 *
234 -
 * @param {Object.<Object>} languages - Languages
235 -
 *   to filter
236 -
 * @param {Array.<string>} only - Allowed languages; if
237 -
 *   non-empty, only included languages are kept.
238 -
 * @param {Array.<string>} ignore - Disallowed languages;
239 -
 *   included languages are ignored.
240 -
 * @return {Object.<Object>} - Filtered array of
241 -
 *   languages.
277 +
 * @param {Record<string, Record<string, number>>} languages
278 +
 *   Languages to filter
279 +
 * @param {string[]} only
280 +
 *   Allowed languages; if non-empty, only included languages are kept.
281 +
 * @param {string[]} ignore
282 +
 *   Disallowed languages; included languages are ignored.
283 +
 * @return {Record<string, Record<string, number>>}
284 +
 *   Filtered array of languages.
242 285
 */
243 286
function filterLanguages(languages, only, ignore) {
244 287
  if (only.length === 0 && ignore.length === 0) {
245 288
    return languages
246 289
  }
247 290
291 +
  /** @type {Record<string, Record<string, number>>} */
248 292
  const filteredLanguages = {}
293 +
  /** @type {string} */
249 294
  let language
250 295
251 296
  for (language in languages) {
@@ -260,13 +305,14 @@
Loading
260 305
/**
261 306
 * Check if `language` can match according to settings.
262 307
 *
263 -
 * @param {string} language - Languages
264 -
 *   to filter
265 -
 * @param {Array.<string>} only - Allowed languages; if
266 -
 *   non-empty, only included languages are kept.
267 -
 * @param {Array.<string>} ignore - Disallowed languages;
268 -
 *   included languages are ignored.
269 -
 * @return {boolean} - Whether `language` can match
308 +
 * @param {string} language
309 +
 *   Languages to filter
310 +
 * @param {string[]} only
311 +
 *   Allowed languages; if non-empty, only included languages are kept.
312 +
 * @param {string[]} ignore
313 +
 *   Disallowed languages; included languages are ignored.
314 +
 * @return {boolean}
315 +
 *   Whether `language` can match
270 316
 */
271 317
function allow(language, only, ignore) {
272 318
  if (only.length === 0 && ignore.length === 0) {
@@ -278,18 +324,29 @@
Loading
278 324
  )
279 325
}
280 326
281 -
/* Create a single `und` tuple. */
327 +
/**
328 +
 * Create a single `und` tuple.
329 +
 */
282 330
function und() {
283 331
  return singleLanguageTuples('und')
284 332
}
285 333
286 -
/* Create a single tuple as a list of tuples from a given
287 -
 * language code. */
334 +
/**
335 +
 * Create a single tuple as a list of tuples from a given language code.
336 +
 *
337 +
 * @param {string} language
338 +
 * @returns {TrigramTuple[]}
339 +
 */
288 340
function singleLanguageTuples(language) {
289 341
  return [[language, 1]]
290 342
}
291 343
292 -
/* Deep regular sort on the number at `1` in both objects. */
344 +
/**
345 +
 * Deep regular sort on the number at `1` in both objects.
346 +
 *
347 +
 * @param {TrigramTuple} a
348 +
 * @param {TrigramTuple} b
349 +
 */
293 350
function sort(a, b) {
294 351
  return a[1] - b[1]
295 352
}

@@ -1,3 +1,4 @@
Loading
1 +
/** @type {Record<string, Record<string, string>>} */
1 2
export const data = {
2 3
  Latin: {
3 4
    spa: ' de|de |os | la| a |la | y |ón |ión|es |ere|rec|ien|o a|der|ció|a p|cho|ech|en |ent|a l|aci|e d|el |ona|na | co|as |al |da | to|ene|e l| en| el| pe|nte|tod|ho | su|per|ad | ti|a t|ers|tie| se|rso| pr|son|e s|te |oda|cia|n d|o d|dad|ida| in|ne | es|ion|cio|s d|con|est|a e| po|men| li|res|nci|su |to |tra| re|n e| lo|tad| na|los|a s| o |ia |que| pa|rá |pro| un|s y|ual|s e|lib|nac|do |ra |er |nal|ue | qu|e e|a d|ar |nes|ica|a c|sta|ser|or |ter|se |por|cci|io |des|ado|les|one|a a|del|l d|ndi| so| cu|s p|ale|s n|ame|par|ici|oci|una|ber|s t|rta|com| di|e a|imi|o s|e c|ert|o e|dos|las|o p|ant|dic|nto| al|ara|ibe|enc|cas| as|e p|ten|ali|o t|soc|y l|n c|s l|l t|pre|nta|so |tos|y a|ria|n t|die|a u| fu|no |l p|ial|qui|dis|s o|hos|gua|igu| ig| ca|sar| ma|l e| ac|tiv|s a|re |nad|vid|era| tr|ier|cua|n p|cla|ade|bre|s s|esa|ntr|ecc|a i| le|lid|das|d d|ido|ari|ind|ada|nda|fun|mie|ca |tic|eli|ta |y d|nid|e i|n l|ios|o y|esp|iva|y e|mat|bli|r a|drá|tri|cti|tal|rim|ont|erá|us |sus|end|pen|tor|ito|ond|ori|uie|lig|n a|ist|rac|lar|rse|tar|mo |omo|ibr|odo|edi|med| me|nio|a y|eda|isf|lo |aso|l m|ias|ico|lic|ple|ste|act|tec|ote|rot|ele|ura| ni|ie |adi|u p|seg|s i|un |und|a n|lqu|alq|o i|inc|sti| si|n s|ern',

@@ -1,4 +1,5 @@
Loading
1 1
// This file is generated by `build.js`.
2 +
/** @type {Record<string, RegExp>} */
2 3
export const expressions = {
3 4
  cmn: /[\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u3005\u3007\u3021-\u3029\u3038-\u303B\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFA6D\uFA70-\uFAD9]|\uD81B[\uDFE2\uDFE3\uDFF0\uDFF1]|[\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879\uD880-\uD883][\uDC00-\uDFFF]|\uD869[\uDC00-\uDEDF\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF38\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uD884[\uDC00-\uDF4A]/g,
4 5
  Latin:
Files Coverage
packages 100.00%
Project Totals (4 files) 100.00%
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading