src/stringtools.rs at main · tholps.site/skidmark

tholps.site / skidmark
fork
Tholp's bespoke website generator
fork
skidmark / src / stringtools.rs
at main 476 lines 13 kB view raw
wrap content
Tholp add `groupname` macro 4w ago
25daab7b
  1use mlua::ObjectLike;
  2
  3use super::DELIMITERS;
  4use crate::{
  5    console::error_skid,
  6    process_skid,
  7    project::Project,
  8    types::{SkidContext, Token},
  9};
 10
 11//TODO: Theres a couple functions that are still written like tokens are strings not chars, they work fine
 12// for now but they may need to be changed later
 13
 14pub fn collect_arguments(
 15    tokens: &[Token],
 16    proj_context: &mut Project,
 17    skid_context: &mut SkidContext,
 18) -> Option<(Vec<String>, usize)> {
 19    // Returns arguments vec and number of tokens to be consumed
 20    //let mut output = Vec::new();
 21
 22    let mut quoted: bool = false;
 23    let mut escaped: bool = false;
 24    let mut entered: bool = false;
 25    let mut arg = "".to_string();
 26    let mut args: Vec<String> = Vec::new();
 27
 28    let mut consumed = 0;
 29    let mut exited_cleanly = false;
 30
 31    while consumed < tokens.len() {
 32        let c = tokens[consumed].contents;
 33
 34        consumed += 1;
 35        if c.is_whitespace() && !entered {
 36            continue;
 37        }
 38
 39        if !entered && c == '(' {
 40            entered = true;
 41            continue;
 42        }
 43
 44        if !entered {
 45            break;
 46        }
 47
 48        if !quoted && c == ')' {
 49            exited_cleanly = true;
 50            if !arg.is_empty() {
 51                args.push(arg.clone());
 52                arg.clear();
 53            }
 54            break;
 55        }
 56
 57        if c == '\"' && !escaped {
 58            quoted = !quoted;
 59            // either fucked or empty string
 60            if !quoted && arg.len() == 0 {
 61                args.push("".into());
 62            }
 63
 64            continue;
 65        }
 66
 67        if c == '\\' && !escaped {
 68            escaped = true;
 69            continue;
 70        }
 71
 72        if c == '$' && !escaped && !quoted {
 73            let ret = find_and_execute_lua_block(&tokens[consumed - 1..], proj_context, skid_context);
 74            if ret.is_some() {
 75                let (result, lua_consumed) = ret.unwrap();
 76                consumed += std::cmp::max(lua_consumed, 1) - 1;
 77                
 78                for t in &result {
 79                    arg.push(t.contents);
 80                }
 81            }
 82            else {
 83                arg.push('$');
 84            }
 85            continue;
 86        }
 87
 88        if c.is_whitespace() && !quoted {
 89            if !arg.is_empty() {
 90                args.push(arg.clone());
 91                arg.clear();
 92            }
 93            continue;
 94        }
 95        arg.push(c);
 96    }
 97
 98    if !entered || !exited_cleanly {
 99        return None;
100    }
101    return Some((args, consumed));
102}
103
104pub fn collect_block(tokens: &[Token]) -> Option<(Vec<Token>, usize)> {
105    let mut entered = false;
106    let mut tokens_consumed: usize = 0;
107    let mut entering_bracket_count = 0;
108    let mut exiting_bracket_count = 0;
109    let mut scope_count = 0; //incremented by '{{{', decremented by '}}}'
110    let mut escaped = false;
111
112    let mut block: Vec<Token> = Vec::new();
113
114    // We dont really care about doing anything that in the block right now
115    // maybe have the Token struct contain scope level later?
116    let mut escaped_tok: Token = Token::new('\\', 0, 0);
117    for tok in tokens {
118        tokens_consumed += 1;
119        if !entered {
120            if tok.contents.is_whitespace() {
121                continue;
122            }
123            if tok.contents != '{'
124            // Expected block start, got garbage
125            {
126                // println!("Expected block start, got {}",tok.contents);
127                // for t in &block
128                // {
129                //     print!("{} ", t.contents);
130                // }
131                // exit(1);
132                return None;
133            }
134        }
135
136        let mut escaped_used = false;
137
138        // Scope Start
139        if tok.contents == '{' && !escaped {
140            entering_bracket_count += 1;
141
142            if entering_bracket_count == 3 {
143                scope_count += 1;
144                entering_bracket_count = 0;
145                if !entered {
146                    entered = true;
147                }
148            }
149        } else {
150            entering_bracket_count = 0;
151            if escaped {
152                escaped_used = true;
153            }
154        }
155        // Scope End
156        if tok.contents == '}' && !escaped {
157            exiting_bracket_count += 1;
158            if exiting_bracket_count == 3 {
159                scope_count -= 1;
160                entering_bracket_count = 0;
161            }
162            if scope_count == 0 {
163                break;
164            }
165        } else {
166            exiting_bracket_count = 0;
167            if escaped {
168                escaped_used = true;
169            }
170        }
171
172        if escaped_used {
173            escaped = false;
174            block.push(escaped_tok.clone());
175        }
176
177        if tok.contents == '\\' {
178            escaped = true;
179            escaped_tok = tok.clone();
180        } else {
181            block.push(tok.clone());
182        }
183    }
184
185    if scope_count != 0 {
186        return None;
187    }
188
189    // if block.len() == 6
190    // // things get ugly if its empty
191    // {
192    //     let mut emptyblock = Vec::new();
193    //     emptyblock.push(Token::new(
194    //         "".into(),
195    //         tokens[0].origin_file,
196    //         tokens[0].line_number,
197    //     ));
198    //     return (emptyblock, tokens_consumed);
199    // }
200    // pop brackets, bad and ugly but idgaf
201    block.drain(..3);
202    block.drain(block.len() - 2..);
203    return Some((block, tokens_consumed));
204}
205
206// Call this when you think it might be a lua block, ie $ is the current token's content
207// (Output, consumed)
208pub fn find_and_execute_lua_block(
209    tokens: &[Token],
210    proj_context: &mut Project,
211    skid_context: &mut SkidContext,
212) -> Option<(Vec<Token>, usize)> {
213    if tokens.len() < 3 {
214        return None;
215    }
216    if tokens[0].contents != '$' || tokens[1].contents != '[' {
217        return None;
218    }
219
220    #[derive(PartialEq)]
221    enum QuoteType {
222        UnQuoted,
223        DoubledQuoutes,
224        SingleQuotes,
225        // Lua also has [[...]] strings for multiline but we don't need to specially keep track of them
226    }
227
228    let mut quoted = QuoteType::UnQuoted;
229    let mut escaped = false;
230    let mut brackets = 0;
231    let mut consumed: usize = 1;
232    let mut exited_cleanly = false;
233    // Look for the end of th $[] block
234    for t in &tokens[1..] {
235        consumed += 1;
236
237        if quoted == QuoteType::UnQuoted && t.contents == '[' && !escaped {
238            brackets += 1;
239            continue;
240        }
241
242        if quoted == QuoteType::UnQuoted && t.contents == ']' && !escaped {
243            brackets -= 1;
244            if brackets == 0 {
245                exited_cleanly = true;
246                break;
247            }
248            continue;
249        }
250
251        if quoted == QuoteType::UnQuoted && t.contents == '"' && !escaped {
252            quoted = QuoteType::DoubledQuoutes;
253            continue;
254        }
255
256        if quoted == QuoteType::UnQuoted && t.contents == '\'' && !escaped {
257            quoted = QuoteType::SingleQuotes;
258            continue;
259        }
260
261        if quoted == QuoteType::DoubledQuoutes && t.contents == '"' && !escaped {
262            quoted = QuoteType::UnQuoted;
263            continue;
264        }
265
266        if quoted == QuoteType::SingleQuotes && t.contents == '\'' && !escaped {
267            quoted = QuoteType::UnQuoted;
268            continue;
269        }
270
271        if t.contents == '\\' && !escaped {
272            escaped = true;
273            continue;
274        }
275
276        if escaped {
277            escaped = false;
278        }
279    }
280
281    if !exited_cleanly {
282        return None;
283    }
284
285    // Process embeded macros first
286    let out = process_skid(&tokens[2..consumed - 1], proj_context, skid_context);
287    let trimmed = trim_whitespace_tokens(&out);
288    let mut string: String = "".to_string();
289    for t in trimmed {
290        string.push(t.contents);
291    }
292
293    let ret: Result<mlua::Value, mlua::Error> = skid_context.lua.load(string).eval();
294    if ret.is_err() {
295        error_skid(
296            proj_context,
297            tokens[0].template_origin,
298            tokens[0].origin_line,
299            &ret.err()?.to_string(),
300        );
301        return None;
302    }
303
304    if ret.as_ref().unwrap().is_nil() {
305        return Some((Vec::new(), consumed));
306    }
307
308    let mut return_tokens = split_to_tokens(ret.unwrap().to_string().unwrap(), tokens[0].origin_index);
309    for t in &mut return_tokens {
310        t.pre_proccessed = true;
311    }
312    Some((return_tokens, consumed))
313}
314
315// Theres no std function to have the delimiters be their own element in the out vector
316// clean it up a bit here
317pub fn split_keep_delimiters(instr: String) -> Vec<String> {
318    let split: Vec<&str> = instr.split_inclusive(DELIMITERS).collect();
319    let mut output = Vec::new();
320
321    for s in split {
322        if s.ends_with(DELIMITERS) {
323            let (token, ending) = s.split_at(s.len() - 1);
324            if token.len() > 0 {
325                output.push(token.to_string());
326            }
327            output.push(ending.to_string());
328            //println!("({}, {})", token.to_string(), ending.to_string())
329        } else {
330            output.push(s.to_string());
331        }
332    }
333    return output;
334}
335
336pub fn strings_to_tokens(in_strings: Vec<String>, origin_file: usize) -> Vec<Token> {
337    let mut tokens = Vec::new();
338    let mut line_count = 1;
339
340    for str in in_strings {
341        for c in str.chars() {
342            let current_line = line_count;
343            for char in str.chars() {
344                if char == '\n' {
345                    line_count += 1;
346                }
347            }
348            let token: Token = Token::new(c, origin_file, current_line);
349            tokens.push(token);
350        }
351    }
352
353    return tokens;
354}
355
356// Need to do some special case stuff so you can macros without spaces between
357// (something like "stuff!insert(..)" is split to ["stuff","!insert(..)"] so it can be acted on later)
358pub fn split_to_tokens(instr: String, origin_file: usize) -> Vec<Token> {
359    let split = split_keep_delimiters(instr);
360    let mut new_split: Vec<String> = Vec::new();
361    for s in split {
362        let prefix_offset = s.find(&['!', '&']);
363        if prefix_offset.is_some() {
364            let (first, second) = s.split_at(prefix_offset.unwrap());
365            //println!("\"{}\", \"{}\"", first, second);
366            if first.len() > 0 {
367                new_split.push(first.to_string());
368            }
369            if second.len() > 0 {
370                new_split.push(second.to_string());
371            }
372        } else {
373            if s.len() > 0 {
374                new_split.push(s);
375            }
376        }
377        //sleep(std::time::Duration::from_millis(10));
378    }
379    return strings_to_tokens(new_split, origin_file);
380}
381
382pub fn next_nonwhitespace_token(tokens: &Vec<Token>, index: usize) -> Option<usize> {
383    while index < tokens.len() {
384        if tokens[index].contents.is_whitespace() {
385            continue;
386        }
387        return Some(index);
388    }
389    return None;
390}
391
392//trim whitespace from the ends
393pub fn trim_whitespace_tokens(tokens: &[Token]) -> &[Token] {
394    if tokens.len() == 0 {
395        return tokens;
396    }
397
398    let mut start: usize = 0;
399    let mut end: usize = tokens.len();
400    for tok in tokens {
401        if !tok.contents.is_whitespace() {
402            break;
403        }
404        start = start + 1;
405    }
406
407    for tok in tokens.iter().rev() {
408        if !tok.contents.is_whitespace() {
409            break;
410        }
411        end = end - 1;
412    }
413
414    return &tokens[start..end];
415}
416
417// Find the first instance of the pattern
418pub fn find_pattern(tokens: &[Token], pat: String) -> Option<(usize, usize)> {
419    // (startpoint, length)
420
421    let split_pattern = split_to_tokens(pat, 0);
422    let mut pattern_index: usize = 0;
423    let mut token_index: usize = 0;
424
425    while token_index < tokens.len() && tokens.len() - token_index >= split_pattern.len() {
426        for t in &tokens[token_index..] {
427            if t.contents == split_pattern[pattern_index].contents {
428                pattern_index += 1;
429                if pattern_index == split_pattern.len() {
430                    return Some((token_index, split_pattern.len()));
431                }
432            } else {
433                pattern_index = 0;
434                token_index += 1;
435                break;
436            }
437        }
438    }
439
440    None
441}
442
443pub trait WhitespaceChecks {
444    fn is_only_whitespace(&self) -> bool;
445    fn contains_whitespace(&self) -> bool;
446}
447
448impl WhitespaceChecks for String {
449    fn is_only_whitespace(&self) -> bool {
450        for c in self.chars() {
451            if !c.is_whitespace() {
452                return false;
453            }
454        }
455        return true;
456    }
457
458    fn contains_whitespace(&self) -> bool {
459        for c in self.chars() {
460            if c.is_whitespace() {
461                return true;
462            }
463        }
464        return false;
465    }
466}
467
468pub trait TokenTools {
469    fn trim_whitespace(&mut self) -> &[Token];
470}
471
472impl TokenTools for Vec<Token> {
473    fn trim_whitespace(&mut self) -> &[Token] {
474        return trim_whitespace_tokens(&self[..]);
475    }
476}
Configure Feed

Configure Feed