{"_id":"graphemer","_rev":"2975065","name":"graphemer","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","dist-tags":{"latest":"1.4.0"},"maintainers":[{"name":"mattpauldavies","email":""}],"time":{"modified":"2023-06-15T08:26:26.000Z","created":"2020-09-14T15:37:51.224Z","1.4.0":"2022-09-19T10:19:34.014Z","1.3.0":"2021-12-13T16:14:47.451Z","1.2.0":"2021-01-29T10:28:35.373Z","1.1.1":"2020-09-14T17:40:29.304Z","1.1.0":"2020-09-14T17:01:57.552Z","1.0.0":"2020-09-14T15:37:51.224Z"},"users":{},"author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"versions":{"1.4.0":{"name":"graphemer","version":"1.4.0","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"ed86fe916dad2d90407c66fc6182d3c40f7fc827","_id":"graphemer@1.4.0","_nodeVersion":"12.22.12","_npmVersion":"6.14.16","dist":{"shasum":"fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6","size":58380,"noattachment":false,"key":"/graphemer/-/graphemer-1.4.0.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.4.0.tgz"},"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.4.0_1663582773867_0.23940933334028247"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2022-09-19T10:44:22.773Z","publish_time":1663582774014,"_cnpm_publish_time":1663582774014},"1.3.0":{"name":"graphemer","version":"1.3.0","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"6148859bb06236f07168f98d8eac195fe5d2b890","_id":"graphemer@1.3.0","_nodeVersion":"12.22.7","_npmVersion":"6.14.15","dist":{"shasum":"da05f48272d2840ff8b2d89152be5122f64b7402","size":57517,"noattachment":false,"key":"/graphemer/-/graphemer-1.3.0.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.3.0.tgz"},"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.3.0_1639412087227_0.589658191669399"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2021-12-26T21:59:24.499Z","publish_time":1639412087451,"_cnpm_publish_time":1639412087451},"1.2.0":{"name":"graphemer","version":"1.2.0","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"dependencies":{},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"50c67f64bd47a6cb76f3ed7d95bbbb74d1dee22a","_id":"graphemer@1.2.0","_nodeVersion":"12.20.1","_npmVersion":"6.14.10","dist":{"shasum":"6984f711609f1a16b2ba09c061f2be2a330eb488","size":56745,"noattachment":false,"key":"/graphemer/-/graphemer-1.2.0.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.2.0.tgz"},"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.2.0_1611916115243_0.10838331884902108"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2021-12-26T21:59:23.429Z","publish_time":1611916115373,"_cnpm_publish_time":1611916115373},"1.1.1":{"name":"graphemer","version":"1.1.1","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"dependencies":{},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"5170b7f7561f9ce88cb5405633bfb4ed63b29d6f","_id":"graphemer@1.1.1","_nodeVersion":"12.18.3","_npmVersion":"6.14.6","dist":{"shasum":"be2aa7ace23c7fdf263f1d34d83ae19a091d1d12","size":42042,"noattachment":false,"key":"/graphemer/-/graphemer-1.1.1.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.1.1.tgz"},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.1.1_1600105229161_0.484242751424802"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2021-12-26T21:59:20.859Z","publish_time":1600105229304,"_cnpm_publish_time":1600105229304},"1.1.0":{"name":"graphemer","version":"1.1.0","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"dependencies":{},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"24b6c49c7e24af70a261cf8167d5dec7c9c04641","_id":"graphemer@1.1.0","_nodeVersion":"12.18.3","_npmVersion":"6.14.6","dist":{"shasum":"3f56689dd3207f348a438b14e6ffe5f02f26e623","size":4170,"noattachment":false,"key":"/graphemer/-/graphemer-1.1.0.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.1.0.tgz"},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.1.0_1600102917378_0.5915635240249841"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2021-12-26T21:59:19.791Z","publish_time":1600102917552,"_cnpm_publish_time":1600102917552},"1.0.0":{"name":"graphemer","version":"1.0.0","description":"A JavaScript library that breaks strings into their individual user-perceived characters (including emojis!)","homepage":"https://github.com/flmnt/graphemer","author":{"name":"Matt Davies","email":"matt@filament.so","url":"https://github.com/mattpauldavies"},"contributors":[{"name":"Orlin Georgiev","url":"https://github.com/orling"},{"name":"Huáng Jùnliàng","url":"https://github.com/JLHwung"}],"main":"./lib/index.js","types":"./lib/index.d.ts","license":"MIT","keywords":["utf-8","strings","emoji","split"],"scripts":{"prepublishOnly":"npm run build","build":"tsc --project tsconfig.json","pretest":"npm run build","test":"ts-node node_modules/tape/bin/tape tests/**.ts","prettier:check":"prettier --check .","prettier:fix":"prettier --write ."},"repository":{"type":"git","url":"git+https://github.com/flmnt/graphemer.git"},"bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"dependencies":{},"devDependencies":{"@types/tape":"^4.13.0","husky":"^4.3.0","lint-staged":"^10.3.0","prettier":"^2.1.1","tape":"^4.6.3","ts-node":"^9.0.0","typescript":"^4.0.2"},"husky":{"hooks":{"pre-commit":"lint-staged","pre-push":"npm test"}},"lint-staged":{"*.{js,ts,md,json}":"prettier --write"},"gitHead":"aede0bdc9c06a27c4a69983ff3c6b3ebd6d0f256","_id":"graphemer@1.0.0","_nodeVersion":"12.18.3","_npmVersion":"6.14.6","dist":{"shasum":"3c7eeab1a7b5300a4647b688b599122796ddcd1b","size":4127,"noattachment":false,"key":"/graphemer/-/graphemer-1.0.0.tgz","tarball":"http://registry.cnpm.dingdandao.com/graphemer/download/graphemer-1.0.0.tgz"},"maintainers":[{"name":"mattpauldavies","email":""}],"_npmUser":{"name":"mattpauldavies","email":"matt@teamdavies.co.uk"},"directories":{},"_npmOperationalInternal":{"host":"s3://npm-registry-packages","tmp":"tmp/graphemer_1.0.0_1600097871094_0.009447422511954517"},"_hasShrinkwrap":false,"_cnpmcore_publish_time":"2021-12-26T21:59:18.891Z","publish_time":1600097871224,"_cnpm_publish_time":1600097871224}},"readme":"# Graphemer: Unicode Character Splitter ????\n\n## Introduction\n\nThis library continues the work of [Grapheme Splitter](https://github.com/orling/grapheme-splitter) and supports the following unicode versions:\n\n- Unicode 15 and below `[v1.4.0]`\n- Unicode 14 and below `[v1.3.0]`\n- Unicode 13 and below `[v1.1.0]`\n- Unicode 11 and below `[v1.0.0]` (Unicode 10 supported by `grapheme-splitter`)\n\nIn JavaScript there is not always a one-to-one relationship between string characters and what a user would call a separate visual \"letter\". Some symbols are represented by several characters. This can cause issues when splitting strings and inadvertently cutting a multi-char letter in half, or when you need the actual number of letters in a string.\n\nFor example, emoji characters like \"????\",\"????\",\"????\",\"????\" and \"????\" are represented by two JavaScript characters each (high surrogate and low surrogate). That is,\n\n```javascript\n'????'.length == 2;\n```\n\nThe combined emoji are even longer:\n\n```javascript\n'????️‍????'.length == 6;\n```\n\nWhat's more, some languages often include combining marks - characters that are used to modify the letters before them. Common examples are the German letter ü and the Spanish letter ñ. Sometimes they can be represented alternatively both as a single character and as a letter + combining mark, with both forms equally valid:\n\n```javascript\nvar two = 'ñ'; // unnormalized two-char n+◌̃, i.e. \"\\u006E\\u0303\";\nvar one = 'ñ'; // normalized single-char, i.e. \"\\u00F1\"\n\nconsole.log(one != two); // prints 'true'\n```\n\nUnicode normalization, as performed by the popular punycode.js library or ECMAScript 6's String.normalize, can **sometimes** fix those differences and turn two-char sequences into single characters. But it is **not** enough in all cases. Some languages like Hindi make extensive use of combining marks on their letters, that have no dedicated single-codepoint Unicode sequences, due to the sheer number of possible combinations.\nFor example, the Hindi word \"अनुच्छेद\" is comprised of 5 letters and 3 combining marks:\n\nअ + न + ु + च + ् + छ + े + द\n\nwhich is in fact just 5 user-perceived letters:\n\nअ + नु + च् + छे + द\n\nand which Unicode normalization would not combine properly.\nThere are also the unusual letter+combining mark combinations which have no dedicated Unicode codepoint. The string Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘ obviously has 5 separate letters, but is in fact comprised of 58 JavaScript characters, most of which are combining marks.\n\nEnter the `graphemer` library. It can be used to properly split JavaScript strings into what a human user would call separate letters (or \"extended grapheme clusters\" in Unicode terminology), no matter what their internal representation is. It is an implementation on the [Default Grapheme Cluster Boundary](http://unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table) of [UAX #29](http://www.unicode.org/reports/tr29/).\n\n## Installation\n\nInstall `graphemer` using the NPM command below:\n\n```\n$ npm i graphemer\n```\n\n## Usage\n\nIf you're using [Typescript](https://www.typescriptlang.org/) or a compiler like [Babel](https://babeljs.io/) (or something like Create React App) things are pretty simple; just import, initialize and use!\n\n```javascript\nimport Graphemer from 'graphemer';\n\nconst splitter = new Graphemer();\n\n// split the string to an array of grapheme clusters (one string each)\nconst graphemes = splitter.splitGraphemes(string);\n\n// iterate the string to an iterable iterator of grapheme clusters (one string each)\nconst graphemeIterator = splitter.iterateGraphemes(string);\n\n// or do this if you just need their number\nconst graphemeCount = splitter.countGraphemes(string);\n```\n\nIf you're using vanilla Node you can use the `require()` method.\n\n```javascript\nconst Graphemer = require('graphemer').default;\n\nconst splitter = new Graphemer();\n\nconst graphemes = splitter.splitGraphemes(string);\n```\n\n## Examples\n\n```javascript\nimport Graphemer from 'graphemer';\n\nconst splitter = new Graphemer();\n\n// plain latin alphabet - nothing spectacular\nsplitter.splitGraphemes('abcd'); // returns [\"a\", \"b\", \"c\", \"d\"]\n\n// two-char emojis and six-char combined emoji\nsplitter.splitGraphemes('????????????????????????️‍????'); // returns [\"????\",\"????\",\"????\",\"????\",\"????\",\"????️‍????\"]\n\n// diacritics as combining marks, 10 JavaScript chars\nsplitter.splitGraphemes('Ĺo͂řȩm̅'); // returns [\"Ĺ\",\"o͂\",\"ř\",\"ȩ\",\"m̅\"]\n\n// individual Korean characters (Jamo), 4 JavaScript chars\nsplitter.splitGraphemes('뎌쉐'); // returns [\"뎌\",\"쉐\"]\n\n// Hindi text with combining marks, 8 JavaScript chars\nsplitter.splitGraphemes('अनुच्छेद'); // returns [\"अ\",\"नु\",\"च्\",\"छे\",\"द\"]\n\n// demonic multiple combining marks, 75 JavaScript chars\nsplitter.splitGraphemes('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞'); // returns [\"Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍\",\"A̴̵̜̰͔ͫ͗͢\",\"L̠ͨͧͩ͘\",\"G̴̻͈͍͔̹̑͗̎̅͛́\",\"Ǫ̵̹̻̝̳͂̌̌͘\",\"!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞\"]\n```\n\n## TypeScript\n\nGraphemer is built with TypeScript and, of course, includes type declarations.\n\n```javascript\nimport Graphemer from 'graphemer';\n\nconst splitter = new Graphemer();\n\nconst split: string[] = splitter.splitGraphemes('Z͑ͫ̓ͪ̂ͫ̽͏̴̙̤̞͉͚̯̞̠͍A̴̵̜̰͔ͫ͗͢L̠ͨͧͩ͘G̴̻͈͍͔̹̑͗̎̅͛́Ǫ̵̹̻̝̳͂̌̌͘!͖̬̰̙̗̿̋ͥͥ̂ͣ̐́́͜͞');\n```\n\n## Contributing\n\nSee [Contribution Guide](./CONTRIBUTING.md).\n\n## Acknowledgements\n\nThis library is a fork of the incredible work done by Orlin Georgiev and Huáng Jùnliàng at https://github.com/orling/grapheme-splitter.\n\nThe original library was heavily influenced by Devon Govett's excellent [grapheme-breaker](https://github.com/devongovett/grapheme-breaker) CoffeeScript library.\n","_attachments":{},"homepage":"https://github.com/flmnt/graphemer","bugs":{"url":"https://github.com/flmnt/graphemer/issues"},"license":"MIT"}