-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathstring.lisp
772 lines (716 loc) · 31.5 KB
/
string.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
;;; Copyright 2020 Google LLC
;;;
;;; Use of this source code is governed by an MIT-style
;;; license that can be found in the LICENSE file or at
;;; https://opensource.org/licenses/MIT.
;;; Utilities dealing with strings and characters.
;;; The symbols in this package are designed to be used the package prefix.
;;; Use the ACE.CORE namespace for simple syntax.
;;;
;;; TODO(czak): Unify the libraries across google3/lisp and travel/qpx.
(defpackage #:ace.core.string
(:use #:common-lisp
#:ace.core.defun
#:ace.core.once-only)
(:import-from #:ace.core.check #:dcheck #:check #:expect)
(:import-from #:ace.core.macro
#:with-gensyms
#:gensymp
#:without-code-deletion-notes
#:eval*)
(:import-from #:ace.core.vector #:index #:size #:with-vector)
(:import-from #:ace.core.collect #:with-collected-values)
(:export
#:designator
#:cat
#:prefixp
#:suffixp
#:split
#:tokenize
#:delimit
#:do-tokens
#:search-replace
#:join
#:len
#:emptyp
#:index
#:clear
#:copy
#:hash ; TODO(czak): Implement.
#:read-as-keyword
#:read-as-keyword-error
;; Also exported from the ACE.CORE namespace.
#:end-of-token-p
#:base-char-p
#:whitespacep))
(in-package #:ace.core.string)
(deftype designator () "A type that can be passed to STRING." '(or string symbol character))
(deftype index? () "NULL or INDEX." '(or null index))
(defun check-string (string start end)
"Checks if START and END forms are within the STRING if they all are constant forms."
(cond ((not (stringp string)))
((and (integerp start) (integerp end))
(expect (<= 0 start end (length string))))
((integerp start)
(expect (<= 0 start (length string))))
((integerp end)
(expect (<= 0 end (length string))))))
(defun* prefixp (prefix string &key (start1 0) end1 (start2 0) end2 ignore-case)
(declare (self (string string &key index index? index index? t) boolean))
"True if STRING starts with the PREFIX.
Both strings can be restricted in their range using the START1, END1, START2, or END2 parameters.
It is an error if START1, END1, START2, or END2 are outside of the corresponding string boundaries.
It is an error for START1 > END1 or for START2 > END2.
The errors are not necessarily signaled in optimized builds.
Arguments:
PREFIX - the supposed prefix of STRING.
STRING - the string that is tested for the prefix.
START1 - position on PREFIX to start comparison from.
END1 - final exclusive position on PREFIX to end the comparison at.
START2 - position on STRING to start comparison from.
END2 - final exclusive position on the STRING to end the comparison at.
IGNORE-CASE - if non-nil, the test will be case insensitive.
Related:
cl:string=
cl:search
string:suffixp
sequence:prefixp
alexandria:starts-with-subseq
emacs:string-prefix-p
"
#-opt
(progn
(dcheck (<= 0 start1 (length prefix)))
(when end1 (dcheck (<= start1 end1 (length prefix))))
(dcheck (<= 0 start2 (length string)))
(when end2 (dcheck (<= start2 end2 (length string)))))
(let* ((length (- (or end1 (length prefix)) start1))
(end (+ length start2)))
(declare (type index end length))
(and (<= end (or end2 (length string)))
(if ignore-case
(string-equal prefix string
:start1 start1 :end1 end1
:start2 start2 :end2 end)
(string= prefix string
:start1 start1 :end1 end1
:start2 start2 :end2 end)))))
(define-compiler-macro* prefixp (&whole whole prefix string
&key (start1 `(or ,start1 0)) end1
(start2 `(or ,start2 0)) end2
ignore-case)
(declare (string prefix string)
(index start1 start2)
(index? end1 end2))
(check-string prefix start1 end1)
(check-string string start2 end2)
(typecase ignore-case
(boolean
;; TODO(czak): Make ONCE-ONLY* evaluate each init-form in succession.
;; This cascade of ONCE-ONLY is needed because it evaluates all
;; init-forms in parallel and we need PREFIX and STRING as a value.
(once-only ((end1 `(or ,end1 (length ,prefix)))
(end2 `(or ,end2 (length ,string))))
(declare (index end1 end2))
(once-only ((end `(+ (- ,end1 ,start1) ,start2)))
(declare (index end))
`(and (<= ,end ,end2)
(,(if ignore-case 'string-equal 'string=)
,prefix ,string
:start1 ,start1 :end1 ,end1
:start2 ,start2 :end2 ,end)))))
(t (return whole))))
(defun* suffixp (suffix string &key (start1 0) end1 (start2 0) end2 ignore-case)
(declare (self (string string &key index index? index index? t) boolean))
"True if STRING ends with SUFFIX.
Both strings can be restricted in their range using the START1, END1, START2, or END2 parameters.
It is an error if START1, END1, START2, or END2 are outside of the corresponding string boundaries.
It is an error for START1 > END1 or for START2 > END2.
The errors are not necessarily signaled in optimized builds.
Arguments:
SUFFIX - the supposed suffix of STRING.
STRING - the string that is tested for the suffix.
START1 - position on SUFFIX to start comparison from.
END1 - final exclusive position on SUFFIX to end the comparison at.
START2 - position on the STRING to start comparison from.
END2 - final and exclusive position on the STRING to end the comparison at.
IGNORE-CASE - if non-nil, the test will be case insensitive.
Related:
cl:string=
cl:search
string:prefixp
sequence:suffixp
alexandria:string-ends-with-subseq
emacs:string-suffix-p
"
#-opt
(progn
(dcheck (<= start1 (length suffix)))
(when end1 (dcheck (<= start1 end1 (length suffix))))
(dcheck (<= start2 (length string)))
(when end2 (dcheck (<= start2 end2 (length string)))))
(let* ((length (- (or end1 (length suffix)) start1))
(start (- (or end2 (length string)) length)))
;; START maybe negative.
(declare (type index length) (fixnum start))
(and (>= start start2)
(if ignore-case
(string-equal suffix string
:start1 start1 :end1 end1
:start2 start :end2 end2)
(string= suffix string
:start1 start1 :end1 end1
:start2 start :end2 end2)))))
(define-compiler-macro* suffixp (&whole whole suffix string
&key (start1 `(or ,start1 0)) end1
(start2 `(or ,start2 0)) end2
ignore-case)
(declare (string prefix string)
(index start1 start2)
(index? end1 end2))
(check-string suffix start1 end1)
(check-string string start2 end2)
(typecase ignore-case
(boolean
(once-only ((end1 `(or ,end1 (length ,suffix)))
(end2 `(or ,end2 (length ,string))))
(declare (index end1 end2))
(once-only ((start `(- ,end2 (- ,end1 ,start1))))
(declare (fixnum start))
`(and (>= ,start ,start2)
(,(if ignore-case 'string-equal 'string=)
,suffix ,string
:start1 ,start1 :end1 ,end1
:start2 ,start :end2 ,end2)))))
(t (return whole))))
(defun* base-char-p (character)
(declare (self inline foldable (character) boolean))
"True if the CHARACTER is a base-char."
(typep character 'base-char))
(defun* whitespacep (character)
(declare (self inline foldable (character) (or null character)))
"True if the CHARACTER is a whitespace character."
(when (base-char-p character)
(find
(the base-char character)
#.(coerce '(#\Space #\Backspace #\Tab #\Newline #\Linefeed #\Page #\Return)
'simple-base-string)
:test #'char=)))
(defun* end-of-token-p (char)
(declare (self ((or null character)) boolean))
"True if the CHAR terminates a token. I.e. it is whitespace, a terminating char, or NIL."
(and (or (whitespacep char)
(member char '(nil #\( #\) #\" #\' #\` #\, #\;))) t))
;;;
;;; Tokenization - an exercise in optimization...
;;;
(deftype char-selector ()
"A character, character sequence, or a function used to select characters."
'(or (function (character) t) character sequence symbol))
(deftype char-selector? () "NULL or CHAR-SELECTOR" '(or null char-selector))
;;;
;;; Delimit ...
;;;
(defun* %%delimit-by-fn
(call-back string by start end count keep-rest keep-empty)
;; This is a helper function for DELIMIT and TOKENIZE.
;; It uses a function to find the separators.
;; It is tempting to make this function inline. But it is 6 KB when optimized.
(declare
(self (function simple-string function index index index? t t) index))
(macrolet
((%tok (&key count keep-empty)
`(loop ,@(when count
'(:for token-count :of-type index :from count :above 0))
,@(if keep-empty
'(:for %token-start :of-type index = start
:then (1+ %token-end))
'(:for %token-start :of-type index?
= (position-if-not by string :start start :end end)
:then (position-if-not by string
:start (1+ %token-end) :end end)
:while %token-start))
:for %token-end :of-type index?
= (position-if by string :start %token-start :end end)
:do (funcall call-back %token-start (or %token-end end))
:while %token-end
:finally
,@(when count
`((when (and keep-rest %token-end
(if keep-empty
(< %token-end end)
(< (1+ %token-end) end)))
(funcall call-back (1+ %token-end) end))))
(return (1+ (or %token-end end)))))
(tok ()
`(cond ((null count)
(if keep-empty (%tok :keep-empty t) (%tok)))
((zerop count)
(when (and keep-rest (or keep-empty (< start end)))
(funcall call-back start end))
start)
(keep-empty (%tok :count t :keep-empty t))
(t (%tok :count t)))))
(declare (function by))
(etypecase string
(base-string (tok))
((array character) (tok)))))
(defun* %%delimit-by-char
(call-back string char start end count keep-rest keep-empty)
;; This is a helper function for DELIMIT and TOKENIZE.
;; It uses a character as a separator.
;; It is tempting to make this function inline. But it is 6 KB when optimized.
(declare
(self (function simple-string character index index index? t t) index))
(macrolet
((%tok (&key count keep-empty)
`(loop ,@(when count
'(:for token-count :of-type index :from count :above 0))
,@(if keep-empty
'(:for %token-start :of-type index = start
:then (1+ %token-end))
'(:for %token-start :of-type index?
= (position char string
:start start :end end
:test-not #'char=)
:then (position char string
:start (1+ %token-end) :end end
:test-not #'char=)
:while %token-start))
:for %token-end :of-type index?
= (position char string
:start %token-start :end end
:test #'char=)
:do (funcall call-back %token-start (or %token-end end))
:while %token-end
:finally
,@(when count
`((when (and keep-rest %token-end
(if keep-empty
(< %token-end end)
(< (1+ %token-end) end)))
(funcall call-back (1+ %token-end) end))))
(return (1+ (or %token-end end)))))
(tok ()
`(cond ((null count)
(if keep-empty (%tok :keep-empty t) (%tok)))
((zerop count)
(when (and keep-rest (or keep-empty (< start end)))
(funcall call-back start end))
start)
(keep-empty (%tok :count t :keep-empty t))
(t (%tok :count t)))))
(etypecase string
(base-string (tok))
((array character) (tok)))))
(defun* %%delimit (call-back string by start end count keep-rest keep-empty)
;; This function is a wrapper around %%delimit-by-fn and %%delimit-by-char.
;; It adds dynamic scope functions used the wrapped functions.
(declare (self (function simple-string char-selector? index index index? t t)
index))
(flet ((use (fn)
(%%delimit-by-fn
call-back string fn start end count keep-rest keep-empty))
(lst (c)
(find (the character c) (the list by) :test #'char=))
(simple-str (c)
(find (the character c)
(the (and simple-string (array character)) by)
:test #'char=))
(str (c)
(find (the character c) (the string by) :test #'char=))
(array (c)
(find (the character c) (the (array character) by) :test #'char=))
(seq (c)
(find (the character c) by :test #'char=)))
(declare (dynamic-extent #'use #'simple-str #'str #'array #'seq))
(etypecase by
((or null character)
(%%delimit-by-char
call-back string (or by #\Space) start end count keep-rest keep-empty))
(function (use by))
(symbol (use (symbol-function by)))
(list (use #'lst))
((and simple-string (array character))
(use #'simple-str))
(string (use #'str))
((array character) (use #'array))
(sequence (use #'seq)))))
(define-compiler-macro %%delimit
(&whole whole call-back string by start end count keep-rest keep-empty
&environment env)
(once-only (call-back string by)
(declare (dynamic-extent call-back by) (inline call-back)
(type index start) (type index? end count))
(cond ((typep by '(cons (member function lambda)) env)
`(%%delimit-by-fn ,call-back ,string ,by
,start ,end ,count ,keep-rest ,keep-empty))
((constantp by env)
(let ((by! (eval* by env)))
(etypecase by!
((or null character)
`(%%delimit-by-char ,call-back ,string ,(or by! #\Space)
,start ,end ,count ,keep-rest ,keep-empty))
(list
`(flet ((by (char) (find char (the list ,by) :test #'char=)))
(declare (dynamic-extent #'by))
(%%delimit-by-fn ,call-back ,string #'by
,start ,end ,count ,keep-rest ,keep-empty)))
(sequence
`(flet ((by (char) (find char ,by :test #'char=)))
(declare (dynamic-extent #'by))
(%%delimit-by-fn ,call-back ,string #'by
,start ,end ,count ,keep-rest ,keep-empty)))
(symbol
`(%%delimit-by-fn ,call-back ,string (coerce ,by 'function)
,start ,end ,count ,keep-rest ,keep-empty)))))
(t
(return whole)))))
(defun* delimit (call-back string
&key (by #\Space) (start 0) end
count keep-rest keep-empty)
"Tokenize the STRING, splitting it at characters selected using BY.
Returns one plus the last scanned index while tokenizing.
Parameters:
CALL-BACK - a function receives each token as a pair of delimiting indexes.
BY - a predicate that returns true for each delimiter.
START - is the starting position on the string. Default: 0.
END - is the end position on the string. Default: nil.
COUNT - the maximal count of tokens to tokenize.
KEEP-REST - if true, the rest of the string is returned as the last token.
KEEP-EMPTY - if true, empty tokens delimited using BY will be returned.
"
(declare (self ((function (index index) *)
(or null string)
&key
(:by char-selector)
(:start index)
(:end index?)
(:count index?)
(:keep-rest t)
(:keep-empty t))
index))
(etypecase string
(null start)
(simple-string
(%%delimit call-back string by
start (or end (length string))
count keep-rest keep-empty))
(string
(with-vector ((%string string) (%start start) (%end end) :force-inline t)
(declare (optimize #+sbcl (sb-c::insert-array-bounds-checks 0)))
(let ((offset (- %start start)))
(declare (index offset))
(if (zerop offset)
(%%delimit call-back %string by
%start %end count keep-rest keep-empty)
(- (flet ((%call-back (%token-start %token-end)
(declare (index %token-start %token-end))
(funcall call-back
(- %token-start offset)
(- %token-end offset))))
(declare (inline %call-back) (dynamic-extent #'%call-back))
(%%delimit #'%call-back %string by
%start %end count keep-rest keep-empty))
offset)))))))
(define-compiler-macro* delimit
(call-back string
&key by (start `(or ,start 0)) end count keep-rest keep-empty)
(declare (function call-back) (inline call-back)
(dynamic-extent call-back by)
(type index start) (type index? end count))
`(without-code-deletion-notes ;; make the compiler pay for it.
(etypecase ,string
(null ,start)
(simple-string
(%%delimit ,call-back ,string ,by
,start (or ,end (length ,string))
,count ,keep-rest ,keep-empty))
(string
(with-vector ((%string ,string) (%start ,start) (%end ,end)
:force-inline t)
(declare (optimize #+sbcl (sb-c::insert-array-bounds-checks 0)))
(let ((offset (- %start ,start)))
(declare (index offset))
(if (zerop offset)
(%%delimit ,call-back %string ,by %start %end
,count ,keep-rest ,keep-empty)
(- (flet ((%call-back (%token-start %token-end)
(declare (index %token-start %token-end))
(funcall ,call-back
(- %token-start offset)
(- %token-end offset))))
(declare (inline %call-back)
(dynamic-extent #'%call-back))
(%%delimit #'%call-back %string ,by
%start %end ,count ,keep-rest ,keep-empty))
offset))))))))
;;;
;;; Tokenize ...
;;;
(defun* tokenize
(call-back string
&key (by #\Space) (start 0) end count keep-rest keep-empty)
"Tokenize the STRING, splitting it at characters using the BY char selector.
Returns one plus the last scanned index while tokenizing.
Parameters:
CALL-BACK - a function receives each token as a simple-string.
BY - a null or a character-selector.
START - is the starting position on the string. Default: 0.
END - is the end position on the string. Default: nil.
COUNT - the maximal count of tokens to tokenize.
KEEP-REST - if true, the rest of the string is returned as the last token.
KEEP-EMPTY - if true, empty tokens delimited using BY will be returned.
"
(declare (self ((function (string) *)
(or null string)
&key
(:by char-selector)
(:start index)
(:end index?)
(:count index?)
(:keep-rest t)
(:keep-empty t))
index))
(if string
(with-vector ((%string string) (%start start) (%end end) :force-inline t)
(declare (optimize #+sbcl (sb-c::insert-array-bounds-checks 0)))
(let ((offset (- %start start)))
(declare (index offset))
(flet ((token (%token-start %token-end)
(declare (index %token-start %token-end))
(funcall call-back (subseq (the string %string)
%token-start %token-end))))
(declare (inline token) (dynamic-extent #'token))
(- (%%delimit #'token %string by
%start %end count keep-rest keep-empty)
offset))))
start))
(define-compiler-macro* tokenize
(call-back string
&key by (start `(or ,start 0)) end count keep-rest keep-empty)
(declare (function call-back) (inline call-back)
(dynamic-extent call-back by)
(type index start) (type index? end count))
`(without-code-deletion-notes ;; make the compiler pay for it.
(if ,string
(with-vector ((%string ,string) (%start ,start) (%end ,end)
:force-inline t)
(declare (optimize #+sbcl (sb-c::insert-array-bounds-checks 0)))
(flet ((%token (%token-start %token-end)
(declare (index %token-start %token-end))
(funcall ,call-back
(subseq %string %token-start %token-end))))
(declare (inline %token) (dynamic-extent #'%token))
(- (%%delimit #'%token %string ,by
%start %end ,count ,keep-rest ,keep-empty)
(the index (- %start ,start)))))
,start)))
;;;
;;; DO-TOKENS
;;;
(defmacro do-tokens ((token string
&key (by #\Space) (start 0) end
count keep-empty keep-rest)
&body body)
"Tokenize the STRING using a char selector BY as a separator.
Returns one plus the last scanned index while tokenizing.
Parameters:
TOKEN - an atom or a list: (start end) that is bound to the token or to
the starting and ending indexes on the string respectively.
E.g. to collect the tokens use:
(do-tokens (token string) (push token tokens))
E.g. to capitalize every word between hyphens:
(do-tokens ((start-var end-var) string :by #\-)
(nstring-capitalize string :start start-var :end end-var))
STRING - is the input string to be split into tokens.
BY - a function, a character, or a multiset sequence of characters.
START - the start position on the STRING. Default: 0.
END - the end position on the STRING. NIL means the end of the STRING.
COUNT - the maximal count of tokens to tokenize.
KEEP-REST - if true, the rest of the string is returned as the last token.
KEEP-EMPTY - if true, empty tokens delimited using BY will be returned.
"
(check-type token (or symbol (cons symbol (cons symbol null))))
`(block nil
(flet ((&body ,(if (atom token) (list token) token) ,@body))
(declare (dynamic-extent #'&body) (inline &body))
(,(if (atom token) 'tokenize 'delimit)
#'&body ,string
,@(unless (eql by #\Space) `(:by ,by))
,@(unless (eql start 0) `(:start (or ,start 0)))
,@(when end `(:end ,end))
,@(when count `(:count ,count))
,@(when keep-empty `(:keep-empty ,keep-empty))
,@(when keep-rest `(:keep-rest ,keep-rest))))))
;;;
;;; SPLIT
;;;
(defun* split (string &key (by #\Space) (start 0) end
count keep-rest keep-empty)
"Return tokens from the STRING by splitting it using the BY delimiter.
The string is split at the CHAR characters or at ones that pass the TEST.
START and END demarcate the starting and final position on the string.
Empty strings are not returned in the list unless SKIP-EMPTY is NIL.
STRING can be NIL which results in NIL.
It is an error for START or END to be outside the STRING boundaries.
It is an error for START > END.
The errors are not necessarily signaled in optimized builds.
Parameters:
STRING - the string to be split. STRING = nil returns nil.
BY - a function, a character, or a multiset sequence of characters.
START - the first postion on the string to return values for.
END - the final exclusive position on the string to return values for.
COUNT - the maximal count of tokens to return (plus maybe the rest).
KEEP-REST - if true, the rest of the string is returned as the last token.
KEEP-EMPTY - if true, empty tokens delimited using BY will be returned.
"
(declare (self ((or null string)
&key
(:by char-selector)
(:start index)
(:end index?)
(:count index?)
(:keep-rest t)
(:keep-empty t))
list))
(with-collected-values (collect)
(declare (dynamic-extent #'collect))
(tokenize #'collect string
:by by :start start :end end
:count count :keep-rest keep-rest :keep-empty keep-empty)))
(define-compiler-macro* split
(string &key by start end count keep-rest keep-empty)
(declare (type (or null string) string) (dynamic-extent by))
`(when ,string
(with-collected-values (collect)
(declare (dynamic-extent #'collect))
(tokenize #'collect ,string
,@(unless (eql by #\Space) `(:by ,by))
,@(unless (eql start 0) `(:start (or ,start 0)))
,@(when end `(:end ,end))
,@(when count `(:count ,count))
,@(when keep-rest `(:keep-rest ,keep-rest))
,@(when keep-empty `(:keep-empty ,keep-empty))))))
;;;
;;; Etc.
;;;
(defun* cat (&rest things)
"Concatenate the THINGS as a string."
(declare (self foldable (&rest t) simple-string) (dynamic-extent things))
(apply #'concatenate 'simple-string (mapcar #'string things)))
(define-compiler-macro cat (&rest things)
`(concatenate 'simple-string ,@(mapcar (lambda (x) `(string ,x)) things)))
(defun* join (separator strings)
"Concatenate STRINGS using the SEPARATOR which can be a string, a char, or a symbol."
(declare (self foldable inline (designator list) simple-string))
(if strings
(let ((separator (string separator)))
(apply #'concatenate 'simple-string (pop strings)
(loop for part in strings
collect separator
collect part)))
""))
(defun* len (string)
"Return STRING length."
(declare (self foldable inline (string) size))
(length string))
(defun* emptyp (string)
"True if STRING is empty."
(declare (self foldable inline (string) boolean))
(zerop (the size (length string))))
(defun* search-replace (old new string &key (start 0) (end nil) (test #'char=))
"Searches for the occurrences of OLD and replaces them with NEW in the STRING.
The search takes place between START and END. The characters are compared using TEST.
If OLD is not found in STRING, the STRING is returned.
Otherwise, a new copy of STRING is returned where OLD is replaced with NEW."
(declare (self foldable (string string string &key index index? function) simple-string))
(with-vector ((old old) (start1 0) (end1 nil))
(with-vector ((string string) (start2 start) (end2 end))
(let ((index0 (search old string :test test
:start1 start1 :end1 end1
:start2 start2 :end2 end2)))
(declare (type index? index0))
(if index0
(apply
;; CONCATENATE seems faster than WITH-OUTPUT-TO-STREAM or FORMAT.
#'concatenate 'string
(subseq string start index0)
new
(loop with skip of-type index = (length old)
for start2 of-type index = (+ index0 skip) then (+ index skip)
for index = (search old string :test test
:start1 start1 :end1 end1
:start2 start2 :end2 end2)
collect (subseq string start2 index)
when index
collect new
while index))
string)))))
;;;
;;; TODO(czak): Remove READ-AS-KEYWORD.
;;;
(define-condition read-as-keyword-error (error)
((string :initarg :string :reader read-as-keyword-error-string))
(:report (lambda (error out)
(format out "Cannot convert ~S to a keyword!"
(read-as-keyword-error-string error))))
(:documentation
"An error that signals that a STRING could not be converted to a keyword."))
(defun* read-as-keyword (string)
"Accepts a STRING and returns a keyword with the STRING read in the default reader case.
The string should not have a colon character #\: at any other than the first position or
two colon characters '::' at any other than the first two positions.
The string should not have a sharp sign character #\# at the first position nor
it should have any whitespace nor it should contain any token terminating characters.
In case there are misplaced colon or sharp sign characters no value is read or interned.
Also the string should read to a keyword as the function uses READ-FROM-STRING.
Argument:
string - a string or character for the string of the keyword.
Signals:
string:read-as-keyword-error - in case the STRING cannot be read to a keyword.
Examples:
(string:read-as-keyword \"tEsT\") => :TEST
Related:
alexandria:make-keyword
iterate:keywordize
asdf:keywordize
cl-protobufs:keywordify
qpx:keywordify"
(declare (self (string) keyword))
(let ((pos (position #\: string :from-end t :test #'char=)))
(when (or (and pos (or (> pos 1) (char/= #\: (char string 0))))
(and (plusp (length string)) (char= (char string 0) #\#))
(position-if #'end-of-token-p string))
(error 'read-as-keyword-error :string string)))
(let* ((*package* (find-package "KEYWORD"))
(result (read-from-string string)))
(unless (typep result 'keyword)
(error 'read-as-keyword-error :string string))
result))
(defun* clear (string &key purge)
"Clears a STRING destructively. Returns the CLEARED string or the empty string with PURGE."
(declare (self inline (string &key boolean) string))
(if (or purge (zerop (length string)))
""
(adjust-array string 0)))
(defun* copy (string &key into)
"Copies the STRING. If destination INTO is provided,
it is attempted to copy the string into the destination.
Returns the INTO destination or a new string."
;; The idea here is to reuse as much of the destinations's memory as possible before creating
;; a copy of the string in a new place.
(declare (self (string &key (or null string)) string))
(cond ((or (not into)
(not (subtypep (array-element-type into) (array-element-type string))))
(copy-seq string))
((eq string into)
string)
((= (length string) (length into))
(replace into string))
((and (array-has-fill-pointer-p into)
(<= (length string) (array-dimension into 0)))
(setf (fill-pointer into) (length string))
(replace into string))
(t
(copy-seq string))))