관리-도구
편집 파일: parse.js
// parse a single path portion import { parseClass } from './brace-expressions'; const types = new Set(['!', '?', '+', '*', '@']); const isExtglobType = (c) => types.has(c); // characters that indicate a start of pattern needs the "no dots" bit const addPatternStart = new Set(['[', '.']); const justDots = new Set(['..', '.']); const reSpecials = new Set('().*{}+?[]^$\\!'); const regExpEscape = (s) => s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&'); // any single thing other than / // don't need to escape / when using new RegExp() const qmark = '[^/]'; // * => any number of characters const star = qmark + '*?'; export class AST { type; #root; #parts = []; #parent; #parentIndex; #negs; #filledNegs = false; #options; constructor(type, parent, options = {}) { this.type = type; this.#parent = parent; this.#root = this.#parent ? this.#parent.#root : this; this.#options = this.#root === this ? options : this.#root.#options; this.#negs = this.#root === this ? [] : this.#root.#negs; if (type === '!' && !this.#root.#filledNegs) this.#negs.push(this); this.#parentIndex = this.#parent ? this.#parent.#parts.length : 0; } fillNegs() { if (this !== this.#root) { this.#root.fillNegs(); return this; } if (this.#filledNegs) return this; this.#filledNegs = true; let n; while ((n = this.#negs.pop())) { if (n.type !== '!') continue; // walk up the tree, appending everthing that comes AFTER parentIndex let p = n; let pp = p.#parent; while (pp) { for (let i = p.#parentIndex + 1; !pp.type && i < pp.#parts.length; i++) { for (const part of n.#parts) { /* c8 ignore start */ if (typeof part === 'string') { throw new Error('string part in extglob AST??'); } /* c8 ignore stop */ part.copyIn(pp.#parts[i]); } } p = pp; pp = p.#parent; } } return this; } push(...parts) { for (const p of parts) { if (p === '') continue; /* c8 ignore start */ if (typeof p !== 'string' && !(p instanceof AST && p.#parent === this)) { throw new Error('invalid part: ' + p); } /* c8 ignore stop */ this.#parts.push(p); } } toJSON() { const ret = this.type === null ? this.#parts.slice() : [this.type, ...this.#parts]; if (this.isStart() && !this.type) ret.unshift([]); if (this.isEnd() && (this === this.#root || (this.#root.#filledNegs && this.#parent?.type === '!'))) { ret.push({}); } return ret; } isStart() { if (this.#root === this) return true; // if (this.type) return !!this.#parent?.isStart() if (!this.#parent?.isStart()) return false; return this.#parentIndex === 0; } isEnd() { if (this.#root === this) return true; if (this.#parent?.type === '!') return true; if (!this.#parent?.isEnd()) return false; if (!this.type) return this.#parent?.isEnd(); return (this.#parentIndex === (this.#parent ? this.#parent.#parts.length : 0) - 1); } copyIn(part) { if (typeof part === 'string') this.push(part); else this.push(part.clone(this)); } clone(parent) { const c = new AST(this.type, parent); for (const p of this.#parts) { c.copyIn(p); } return c; } static #parseAST(str, ast, pos, opt) { let escaping = false; if (ast.type === null) { // outside of a extglob, append until we find a start let i = pos; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (!opt.noext && isExtglobType(c) && str.charAt(i) === '(') { ast.push(acc); acc = ''; const ext = new AST(c, ast); i = AST.#parseAST(str, ext, i, opt); ast.push(ext); continue; } acc += c; } ast.push(acc); return i; } // some kind of extglob, pos is at the ( // find the next | or ) let i = pos + 1; let part = new AST(null, ast); const parts = []; let acc = ''; while (i < str.length) { const c = str.charAt(i++); // still accumulate escapes at this point, but we do ignore // starts that are escaped if (escaping || c === '\\') { escaping = !escaping; acc += c; continue; } if (isExtglobType(c) && str.charAt(i) === '(') { part.push(acc); acc = ''; const ext = new AST(c, part); part.push(ext); i = AST.#parseAST(str, ext, i, opt); continue; } if (c === '|') { part.push(acc); acc = ''; parts.push(part); part = new AST(null, ast); continue; } if (c === ')') { part.push(acc); acc = ''; ast.push(...parts, part); return i; } acc += c; } // if we got here, it was a malformed extglob! not an extglob, but // maybe something else in there. ast.type = null; ast.#parts = [str.substring(pos)]; return i; } static fromGlob(pattern, options = {}) { const ast = new AST(null, undefined, options); AST.#parseAST(pattern, ast, 0, options); console.log('parsed', pattern, JSON.stringify(ast)); return ast; } toRegExpSource() { if (this.#root === this) this.fillNegs(); if (!this.type) { const src = this.#parts .map(p => { if (typeof p === 'string') return AST.#parseGlob(p, this.#options); else return p.toRegExpSource(); }) .join(''); let start = ''; if (this.isStart() && typeof this.#parts[0] === 'string') { // '.' and '..' cannot match unless the pattern is that exactly const dotTravAllowed = this.#parts.length === 1 && justDots.has(this.#parts[0]); if (dotTravAllowed) { start = '(?:^|\\/)'; } else { const dotsAllowed = this.#options.dot || // no need to prevent dots if it can't match a dot, or if a sub-pattern // will be preventing it anyway. !addPatternStart.has(src.charAt(0)); start = dotsAllowed ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))' : '(?!\\.)'; } } let end = ''; if (this.isEnd() && (this === this.#root || (this.#root.#filledNegs && this.#parent?.type === '!'))) { end = '(?:$|\\/)'; } return start + src + end; } // some kind of extglob const start = this.type === '!' ? '(?:(?!(?:' : '(?:'; const body = this.#parts .map(p => { /* c8 ignore start */ if (typeof p === 'string') { throw new Error('string type in extglob ast??'); } /* c8 ignore stop */ return p.toRegExpSource(); }) .join('|'); const close = this.type === '!' ? '))[^/]*?)' : this.type === '@' ? ')' : `)${this.type}`; return start + body + close; } static #parseGlob(glob, options) { let escaping = false; let re = ''; let uflag = false; let hasMagic = false; for (let i = 0; i < glob.length; i++) { const c = glob.charAt(i); if (escaping) { escaping = false; re += (reSpecials.has(c) ? '\\' : '') + c; continue; } if (c === '\\') { if (i === glob.length - 1) { re += '\\\\'; } else { escaping = true; } continue; } if (c === '[') { const [src, needUflag, consumed, magic] = parseClass(glob, i); if (consumed) { re += src; uflag = uflag || needUflag; i += consumed - 1; hasMagic = hasMagic || magic; continue; } } if (c === '*') { re += star; hasMagic = true; continue; } if (c === '?') { re += qmark; hasMagic = true; continue; } re += regExpEscape(c); } return re; } } const pattern = 'a@(i|w!(x|y)z+(l|m)|j)'; const ast = AST.fromGlob(pattern).fillNegs(); console.log('negged', pattern, JSON.stringify(ast)); console.log('to re src', pattern, ast.toRegExpSource()); // // the type (exttype or null for strings), and array of children tokens // // // append everything after a negative extglob to each of the parts // // of the negative extglob node. So, eg, [a, [!, x, y], z] // // // // // // // // // // // const globUnescape = (s: string) => s.replace(/\\(.)/g, '$1') // const regExpEscape = (s: string) => // s.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, '\\$&') // // // "abc" -> { a:true, b:true, c:true } // const charSet = (s: string) => // s.split('').reduce((set: { [k: string]: boolean }, c) => { // set[c] = true // return set // }, {}) // // // characters that need to be escaped in RegExp. // const reSpecials = charSet('().*{}+?[]^$\\!') // // // characters that indicate we have to add the pattern start // const addPatternStartSet = charSet('[.(') // // // any single thing other than / // // don't need to escape / when using new RegExp() // const qmark = '[^/]' // // // * => any number of characters // const star = qmark + '*?' // // // TODO: take an offset and length, so we can sub-parse the extglobs // const parse = ( // options: MinimatchOptions, // pattern: string, // debug: (...a: any[]) => void // ): false | string => { // assertValidPattern(pattern) // // if (pattern === '') return '' // // let re = '' // let hasMagic = false // let escaping = false // // ? => one single character // let uflag = false // // // . and .. never match anything that doesn't start with ., // // even when options.dot is set. However, if the pattern // // starts with ., then traversal patterns can match. // let dotTravAllowed = pattern.charAt(0) === '.' // let dotFileAllowed = options.dot || dotTravAllowed // const patternStart = () => // dotTravAllowed // ? '' // : dotFileAllowed // ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))' // : '(?!\\.)' // const subPatternStart = (p: string) => // p.charAt(0) === '.' // ? '' // : options.dot // ? '(?!(?:^|\\/)\\.{1,2}(?:$|\\/))' // : '(?!\\.)' // // const clearStateChar = () => { // if (stateChar) { // // we had some state-tracking character // // that wasn't consumed by this pass. // switch (stateChar) { // case '*': // re += star // hasMagic = true // break // case '?': // re += qmark // hasMagic = true // break // default: // re += '\\' + stateChar // break // } // debug('clearStateChar %j %j', stateChar, re) // stateChar = false // } // } // // for ( // let i = 0, c: string; // i < pattern.length && (c = pattern.charAt(i)); // i++ // ) { // debug('%s\t%s %s %j', pattern, i, re, c) // // // skip over any that are escaped. // if (escaping) { // // completely not allowed, even escaped. // // should be impossible. // /* c8 ignore start */ // if (c === '/') { // return false // } // /* c8 ignore stop */ // // if (reSpecials[c]) { // re += '\\' // } // re += c // escaping = false // continue // } // // switch (c) { // // Should already be path-split by now. // /* c8 ignore start */ // case '/': { // return false // } // /* c8 ignore stop */ // // case '\\': // clearStateChar() // escaping = true // continue // // // the various stateChar values // // for the "extglob" stuff. // case '?': // case '*': // case '+': // case '@': // case '!': // debug('%s\t%s %s %j <-- stateChar', pattern, i, re, c) // // // if we already have a stateChar, then it means // // that there was something like ** or +? in there. // // Handle the stateChar, then proceed with this one. // debug('call clearStateChar %j', stateChar) // clearStateChar() // stateChar = c // // if extglob is disabled, then +(asdf|foo) isn't a thing. // // just clear the statechar *now*, rather than even diving into // // the patternList stuff. // if (options.noext) clearStateChar() // continue // // case '(': { // if (!stateChar) { // re += '\\(' // continue // } // // const plEntry: PatternListEntry = { // type: stateChar, // start: i - 1, // reStart: re.length, // open: plTypes[stateChar].open, // close: plTypes[stateChar].close, // } // debug(pattern, '\t', plEntry) // patternListStack.push(plEntry) // // negation is (?:(?!(?:js)(?:<rest>))[^/]*) // re += plEntry.open // // next entry starts with a dot maybe? // if (plEntry.start === 0 && plEntry.type !== '!') { // dotTravAllowed = true // re += subPatternStart(pattern.slice(i + 1)) // } // debug('plType %j %j', stateChar, re) // stateChar = false // continue // } // // case ')': { // const plEntry = patternListStack[patternListStack.length - 1] // if (!plEntry) { // re += '\\)' // continue // } // patternListStack.pop() // // // closing an extglob // clearStateChar() // hasMagic = true // pl = plEntry // // negation is (?:(?!js)[^/]*) // // The others are (?:<pattern>)<type> // re += pl.close // if (pl.type === '!') { // negativeLists.push(Object.assign(pl, { reEnd: re.length })) // } // continue // } // // case '|': { // const plEntry = patternListStack[patternListStack.length - 1] // if (!plEntry) { // re += '\\|' // continue // } // // clearStateChar() // re += '|' // // next subpattern can start with a dot? // if (plEntry.start === 0 && plEntry.type !== '!') { // dotTravAllowed = true // re += subPatternStart(pattern.slice(i + 1)) // } // continue // } // // // these are mostly the same in regexp and glob // case '[': // // swallow any state-tracking char before the [ // clearStateChar() // const [src, needUflag, consumed, magic] = parseClass(pattern, i) // if (consumed) { // re += src // uflag = uflag || needUflag // i += consumed - 1 // hasMagic = hasMagic || magic // } else { // re += '\\[' // } // continue // // case ']': // re += '\\' + c // continue // // default: // // swallow any state char that wasn't consumed // clearStateChar() // // re += regExpEscape(c) // break // } // switch // } // for // // // handle the case where we had a +( thing at the *end* // // of the pattern. // // each pattern list stack adds 3 chars, and we need to go through // // and escape any | chars that were passed through as-is for the regexp. // // Go through and escape them, taking care not to double-escape any // // | chars that were already escaped. // for (pl = patternListStack.pop(); pl; pl = patternListStack.pop()) { // let tail: string // tail = re.slice(pl.reStart + pl.open.length) // debug(pattern, 'setting tail', re, pl) // // maybe some even number of \, then maybe 1 \, followed by a | // tail = tail.replace(/((?:\\{2}){0,64})(\\?)\|/g, (_, $1, $2) => { // if (!$2) { // // the | isn't already escaped, so escape it. // $2 = '\\' // // should already be done // /* c8 ignore start */ // } // /* c8 ignore stop */ // // // need to escape all those slashes *again*, without escaping the // // one that we need for escaping the | character. As it works out, // // escaping an even number of slashes can be done by simply repeating // // it exactly after itself. That's why this trick works. // // // // I am sorry that you have to see this. // return $1 + $1 + $2 + '|' // }) // // debug('tail=%j\n %s', tail, tail, pl, re) // const t = pl.type === '*' ? star : pl.type === '?' ? qmark : '\\' + pl.type // // hasMagic = true // re = re.slice(0, pl.reStart) + t + '\\(' + tail // } // // // handle trailing things that only matter at the very end. // clearStateChar() // if (escaping) { // // trailing \\ // re += '\\\\' // } // // // only need to apply the nodot start if the re starts with // // something that could conceivably capture a dot // const addPatternStart = addPatternStartSet[re.charAt(0)] // // // Hack to work around lack of negative lookbehind in JS // // A pattern like: *.!(x).!(y|z) needs to ensure that a name // // like 'a.xyz.yz' doesn't match. So, the first negative // // lookahead, has to look ALL the way ahead, to the end of // // the pattern. // for (let n = negativeLists.length - 1; n > -1; n--) { // const nl = negativeLists[n] // // const nlBefore = re.slice(0, nl.reStart) // const nlFirst = re.slice(nl.reStart, nl.reEnd - 8) // let nlAfter = re.slice(nl.reEnd) // const nlLast = re.slice(nl.reEnd - 8, nl.reEnd) + nlAfter // // // Handle nested stuff like *(*.js|!(*.json)), where open parens // // mean that we should *not* include the ) in the bit that is considered // // "after" the negated section. // const closeParensBefore = nlBefore.split(')').length // const openParensBefore = nlBefore.split('(').length - closeParensBefore // let cleanAfter = nlAfter // for (let i = 0; i < openParensBefore; i++) { // cleanAfter = cleanAfter.replace(/\)[+*?]?/, '') // } // nlAfter = cleanAfter // // const dollar = nlAfter === '' ? '(?:$|\\/)' : '' // // re = nlBefore + nlFirst + nlAfter + dollar + nlLast // } // // // if the re is not "" at this point, then we need to make sure // // it doesn't match against an empty path part. // // Otherwise a/* will match a/, which it should not. // if (re !== '' && hasMagic) { // re = '(?=.)' + re // } // // if (addPatternStart) { // re = patternStart() + re // } // // // if it's nocase, and the lcase/uppercase don't match, it's magic // if (options.nocase && !hasMagic && !options.nocaseMagicOnly) { // hasMagic = pattern.toUpperCase() !== pattern.toLowerCase() // } // // // skip the regexp for non-magical patterns // // unescape anything in it, though, so that it'll be // // an exact match against a file etc. // if (!hasMagic) { // return globUnescape(re) // } // // return re // } //# sourceMappingURL=parse.js.map