artdaq_node_server  v1_00_07
 All Classes Namespaces Files Variables Pages
_unicodeWords.js
1 
2 var rsAstralRange = '\\ud800-\\udfff',
3  rsComboMarksRange = '\\u0300-\\u036f',
4  reComboHalfMarksRange = '\\ufe20-\\ufe2f',
5  rsComboSymbolsRange = '\\u20d0-\\u20ff',
6  rsComboRange = rsComboMarksRange + reComboHalfMarksRange + rsComboSymbolsRange,
7  rsDingbatRange = '\\u2700-\\u27bf',
8  rsLowerRange = 'a-z\\xdf-\\xf6\\xf8-\\xff',
9  rsMathOpRange = '\\xac\\xb1\\xd7\\xf7',
10  rsNonCharRange = '\\x00-\\x2f\\x3a-\\x40\\x5b-\\x60\\x7b-\\xbf',
11  rsPunctuationRange = '\\u2000-\\u206f',
12  rsSpaceRange = ' \\t\\x0b\\f\\xa0\\ufeff\\n\\r\\u2028\\u2029\\u1680\\u180e\\u2000\\u2001\\u2002\\u2003\\u2004\\u2005\\u2006\\u2007\\u2008\\u2009\\u200a\\u202f\\u205f\\u3000',
13  rsUpperRange = 'A-Z\\xc0-\\xd6\\xd8-\\xde',
14  rsVarRange = '\\ufe0e\\ufe0f',
15  rsBreakRange = rsMathOpRange + rsNonCharRange + rsPunctuationRange + rsSpaceRange;
16 
18 var rsApos = "['\u2019]",
19  rsBreak = '[' + rsBreakRange + ']',
20  rsCombo = '[' + rsComboRange + ']',
21  rsDigits = '\\d+',
22  rsDingbat = '[' + rsDingbatRange + ']',
23  rsLower = '[' + rsLowerRange + ']',
24  rsMisc = '[^' + rsAstralRange + rsBreakRange + rsDigits + rsDingbatRange + rsLowerRange + rsUpperRange + ']',
25  rsFitz = '\\ud83c[\\udffb-\\udfff]',
26  rsModifier = '(?:' + rsCombo + '|' + rsFitz + ')',
27  rsNonAstral = '[^' + rsAstralRange + ']',
28  rsRegional = '(?:\\ud83c[\\udde6-\\uddff]){2}',
29  rsSurrPair = '[\\ud800-\\udbff][\\udc00-\\udfff]',
30  rsUpper = '[' + rsUpperRange + ']',
31  rsZWJ = '\\u200d';
32 
34 var rsMiscLower = '(?:' + rsLower + '|' + rsMisc + ')',
35  rsMiscUpper = '(?:' + rsUpper + '|' + rsMisc + ')',
36  rsOptContrLower = '(?:' + rsApos + '(?:d|ll|m|re|s|t|ve))?',
37  rsOptContrUpper = '(?:' + rsApos + '(?:D|LL|M|RE|S|T|VE))?',
38  reOptMod = rsModifier + '?',
39  rsOptVar = '[' + rsVarRange + ']?',
40  rsOptJoin = '(?:' + rsZWJ + '(?:' + [rsNonAstral, rsRegional, rsSurrPair].join('|') + ')' + rsOptVar + reOptMod + ')*',
41  rsOrdLower = '\\d*(?:(?:1st|2nd|3rd|(?![123])\\dth)\\b)',
42  rsOrdUpper = '\\d*(?:(?:1ST|2ND|3RD|(?![123])\\dTH)\\b)',
43  rsSeq = rsOptVar + reOptMod + rsOptJoin,
44  rsEmoji = '(?:' + [rsDingbat, rsRegional, rsSurrPair].join('|') + ')' + rsSeq;
45 
47 var reUnicodeWord = RegExp([
48  rsUpper + '?' + rsLower + '+' + rsOptContrLower + '(?=' + [rsBreak, rsUpper, '$'].join('|') + ')',
49  rsMiscUpper + '+' + rsOptContrUpper + '(?=' + [rsBreak, rsUpper + rsMiscLower, '$'].join('|') + ')',
50  rsUpper + '?' + rsMiscLower + '+' + rsOptContrLower,
51  rsUpper + '+' + rsOptContrUpper,
52  rsOrdUpper,
53  rsOrdLower,
54  rsDigits,
55  rsEmoji
56 ].join('|'), 'g');
57 
65 function unicodeWords(string) {
66  return string.match(reUnicodeWord) || [];
67 }
68 
69 module.exports = unicodeWords;