Ngurai (ngooh-rai) (means Breaking Down or Describing in Javanese) is a small, highly customized library for line-by-line parsing and tokenize strings.
npm i nguraijs
const { Ngurai } = require('nguraijs')
const urx = new Ngurai({
commentPrefixes: [' *', '/*'],
keywords: ['const', 'return', 'function'],
punctuation: ['='],
custom: {
// custom keywords
variable: ['tx'],
comment: [' */'] // work with entire characters processing
}
})
console.log(
urx.process(
`/* it should
* parse nothing
*/
const tx = /* comments */ 'hello, world!'`
)
)
Output :
;[
[{ type: 'comment', value: '/* it should', position: 0 }],
[{ type: 'comment', value: ' * parse nothing', position: 0 }],
[{ type: 'comment', value: ' */', position: 0 }],
[{ type: 'space', value: ' ', position: 0 }],
[
{ type: 'keyword', value: 'const', position: 0 },
{ type: 'space', value: ' ', position: 5 },
{ type: 'variable', value: 'tx', position: 6 },
{ type: 'space', value: ' ', position: 8 },
{ type: 'punctuation', value: '=', position: 9 },
{ type: 'space', value: ' ', position: 10 },
{ type: 'comment', value: '/* comments */', position: 11 },
{ type: 'space', value: ' ', position: 25 },
{ type: 'string', value: "'hello, world!'", position: 26 }
]
]
nguraijs
only has Ngurai
as export and default export :
export class Ngurai {
/* ... */
}
export default Ngurai // default export only available on esm
const { Ngurai } = require('nguraijs')
import { Ngurai } from 'nguraijs'
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/index.iife.js"></script>
<script>
const { Ngurai } = __nguraijs__
</script>
interface TokenizerConfig {
keywords?: string[]
punctuation?: string[]
operators?: string[]
stringDelimiters?: string[]
numberRegex?: RegExp
identifierRegex?: RegExp
whitespaceRegex?: RegExp
commentPrefixes?: string[]
commentSuffixes?: string[]
custom?: Record<string, (string | RegExp)[]>
plugins?: TokenizerPlugin[]
noUnknownToken?: boolean
noSpace?: boolean
customOnly?: boolean
}
interface TokenizerPlugin {
name: string
process: (input: string, position: number) => Token | null
}
interface Token {
type: string
value: string
position: number
}
export class Ngurai {
private config: Required<TokenizerConfig>
constructor(config: TokenizerConfig = {}) {
this.config = {
keywords: config.keywords || [],
punctuation: config.punctuation || [],
operators: config.operators || [],
stringDelimiters: config.stringDelimiters || ['"', "'", '`'],
numberRegex: config.numberRegex || /^\d+(\.\d+)?([eE][+-]?\d+)?/,
identifierRegex: config.identifierRegex || /^[a-zA-Z_$][a-zA-Z0-9_$]*/,
whitespaceRegex: config.whitespaceRegex || /^[ \t]+/,
commentPrefixes: config.commentPrefixes || ['//', '/*'],
commentSuffixes: config.commentSuffixes || ['', '*/'],
custom: config.custom || {},
plugins: config.plugins || [],
noUnknownToken: config.noUnknownToken || false,
noSpace: config.noSpace || false,
customOnly: config.customOnly || false
}
}
}
Register new plugin into NguraiJS.
public registerPlugin(plugin: TokenizerPlugin) {}
Method to process or tokenize the input string and return the tokenized data.
public process(input: string): Token[][] {}
MIT © 2025 NOuSantx