Skip to content

Commit

Permalink
Add micro-blog exercise (#87)
Browse files Browse the repository at this point in the history
  • Loading branch information
keiravillekode authored Oct 25, 2024
1 parent 6a88603 commit d03a71e
Show file tree
Hide file tree
Showing 12 changed files with 3,613 additions and 0 deletions.
8 changes: 8 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,14 @@
"prerequisites": [],
"difficulty": 3
},
{
"slug": "micro-blog",
"name": "Micro Blog",
"uuid": "3da09ee9-3f72-4c96-9b07-d1c82566cf65",
"practices": [],
"prerequisites": [],
"difficulty": 3
},
{
"slug": "pangram",
"name": "Pangram",
Expand Down
37 changes: 37 additions & 0 deletions exercises/practice/micro-blog/.docs/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Instructions

You have identified a gap in the social media market for very very short posts.
Now that Twitter allows 280 character posts, people wanting quick social media updates aren't being served.
You decide to create your own social media network.

To make your product noteworthy, you make it extreme and only allow posts of 5 or less characters.
Any posts of more than 5 characters should be truncated to 5.

To allow your users to express themselves fully, you allow Emoji and other Unicode.

The task is to truncate input strings to 5 characters.

## Text Encodings

Text stored digitally has to be converted to a series of bytes.
There are 3 ways to map characters to bytes in common use.

- **ASCII** can encode English language characters.
All characters are precisely 1 byte long.
- **UTF-8** is a Unicode text encoding.
Characters take between 1 and 4 bytes.
- **UTF-16** is a Unicode text encoding.
Characters are either 2 or 4 bytes long.

UTF-8 and UTF-16 are both Unicode encodings which means they're capable of representing a massive range of characters including:

- Text in most of the world's languages and scripts
- Historic text
- Emoji

UTF-8 and UTF-16 are both variable length encodings, which means that different characters take up different amounts of space.

Consider the letter 'a' and the emoji '😛'.
In UTF-16 the letter takes 2 bytes but the emoji takes 4 bytes.

The trick to this exercise is to use APIs designed around Unicode characters (codepoints) instead of Unicode codeunits.
17 changes: 17 additions & 0 deletions exercises/practice/micro-blog/.meta/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{
"authors": [
"keiravillekode"
],
"files": {
"solution": [
"micro_blog.s"
],
"test": [
"micro_blog_test.c"
],
"example": [
".meta/example.s"
]
},
"blurb": "Given an input string, truncate it to 5 characters."
}
23 changes: 23 additions & 0 deletions exercises/practice/micro-blog/.meta/example.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
.text
.globl truncate

/* extern void truncate(char *buffer, const char *phrase); */
truncate:
mov x2, #6

.read:
ldrb w3, [x1], #1 /* load byte, post-increment */
strb w3, [x0], #1 /* store byte, post-increment */
cbz w3, .return /* null terminator */

and w4, w3, #0xC0
cmp w4, #0x80
beq .read /* non-initial byte of code point */

sub x2, x2, #1
cbnz x2, .read
/* start of the 6th code point */

.return:
strb wzr, [x0, #-1] /* overwrite most recent byte with '\0' */
ret
46 changes: 46 additions & 0 deletions exercises/practice/micro-blog/.meta/tests.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# This is an auto-generated file.
#
# Regenerating this file via `configlet sync` will:
# - Recreate every `description` key/value pair
# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
# - Preserve any other key/value pair
#
# As user-added comments (using the # character) will be removed when this file
# is regenerated, comments can be added via a `comment` key.

[b927b57f-7c98-42fd-8f33-fae091dc1efc]
description = "English language short"

[a3fcdc5b-0ed4-4f49-80f5-b1a293eac2a0]
description = "English language long"

[01910864-8e15-4007-9c7c-ac956c686e60]
description = "German language short (broth)"

[f263e488-aefb-478f-a671-b6ba99722543]
description = "German language long (bear carpet → beards)"

[0916e8f1-41d7-4402-a110-b08aa000342c]
description = "Bulgarian language short (good)"

[bed6b89c-03df-4154-98e6-a61a74f61b7d]
description = "Greek language short (health)"

[485a6a70-2edb-424d-b999-5529dbc8e002]
description = "Maths short"

[8b4b7b51-8f48-4fbe-964e-6e4e6438be28]
description = "Maths long"

[71f4a192-0566-4402-a512-fe12878be523]
description = "English and emoji short"

[6f0f71f3-9806-4759-a844-fa182f7bc203]
description = "Emoji short"

[ce71fb92-5214-46d0-a7f8-d5ba56b4cc6e]
description = "Emoji long"

[5dee98d2-d56e-468a-a1f2-121c3f7c5a0b]
description = "Royal Flush?"
36 changes: 36 additions & 0 deletions exercises/practice/micro-blog/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
AS = aarch64-linux-gnu-as
CC = aarch64-linux-gnu-gcc

CFLAGS = -g -Wall -Wextra -pedantic -Werror
LDFLAGS =

ALL_LDFLAGS = -pie -Wl,--fatal-warnings

ALL_CFLAGS = -std=c99 -fPIE $(CFLAGS)
ALL_LDFLAGS += $(LDFLAGS)

C_OBJS = $(patsubst %.c,%.o,$(wildcard *.c))
AS_OBJS = $(patsubst %.s,%.o,$(wildcard *.s))
ALL_OBJS = $(filter-out example.o,$(C_OBJS) $(AS_OBJS) vendor/unity.o)

CC_CMD = $(CC) $(ALL_CFLAGS) -c -o $@ $<

all: tests
qemu-aarch64 -L /usr/aarch64-linux-gnu ./$<

tests: $(ALL_OBJS)
@$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) -o $@ $(ALL_OBJS)

%.o: %.s
@$(AS) -o $@ $<

%.o: %.c
@$(CC_CMD)

vendor/unity.o: vendor/unity.c vendor/unity.h vendor/unity_internals.h
@$(CC_CMD)

clean:
@rm -f *.o vendor/*.o tests

.PHONY: all clean
5 changes: 5 additions & 0 deletions exercises/practice/micro-blog/micro_blog.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.text
.globl truncate

truncate:
ret
123 changes: 123 additions & 0 deletions exercises/practice/micro-blog/micro_blog_test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "vendor/unity.h"

#define BUFFER_SIZE 40

extern void truncate(char *buffer, const char *phrase);

void setUp(void) {
}

void tearDown(void) {
}

void test_english_language_short(void) {
char buffer[BUFFER_SIZE];

truncate(buffer, "Hi");
TEST_ASSERT_EQUAL_STRING("Hi", buffer);
}

void test_english_language_long(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "Hello there");
TEST_ASSERT_EQUAL_STRING("Hello", buffer);
}

void test_german_language_short_broth(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "brühe");
TEST_ASSERT_EQUAL_STRING("brühe", buffer);
}

void test_german_language_long_bear_carpet__beards(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "Bärteppich");
TEST_ASSERT_EQUAL_STRING("Bärte", buffer);
}

void test_bulgarian_language_short_good(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "Добър");
TEST_ASSERT_EQUAL_STRING("Добър", buffer);
}

void test_greek_language_short_health(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "υγειά");
TEST_ASSERT_EQUAL_STRING("υγειά", buffer);
}

void test_maths_short(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "a=πr²");
TEST_ASSERT_EQUAL_STRING("a=πr²", buffer);
}

void test_maths_long(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "∅⊊ℕ⊊ℤ⊊ℚ⊊ℝ⊊ℂ");
TEST_ASSERT_EQUAL_STRING("∅⊊ℕ⊊ℤ", buffer);
}

void test_english_and_emoji_short(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "Fly 🛫");
TEST_ASSERT_EQUAL_STRING("Fly 🛫", buffer);
}

void test_emoji_short(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "💇");
TEST_ASSERT_EQUAL_STRING("💇", buffer);
}

void test_emoji_long(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "❄🌡🤧🤒🏥🕰😀");
TEST_ASSERT_EQUAL_STRING("❄🌡🤧🤒🏥", buffer);
}

void test_royal_flush(void) {
TEST_IGNORE();
char buffer[BUFFER_SIZE];

truncate(buffer, "🃎🂸🃅🃋🃍🃁🃊");
TEST_ASSERT_EQUAL_STRING("🃎🂸🃅🃋🃍", buffer);
}

int main(void) {
UNITY_BEGIN();
RUN_TEST(test_english_language_short);
RUN_TEST(test_english_language_long);
RUN_TEST(test_german_language_short_broth);
RUN_TEST(test_german_language_long_bear_carpet__beards);
RUN_TEST(test_bulgarian_language_short_good);
RUN_TEST(test_greek_language_short_health);
RUN_TEST(test_maths_short);
RUN_TEST(test_maths_long);
RUN_TEST(test_english_and_emoji_short);
RUN_TEST(test_emoji_short);
RUN_TEST(test_emoji_long);
RUN_TEST(test_royal_flush);
return UNITY_END();
}
Loading

0 comments on commit d03a71e

Please sign in to comment.