X-Git-Url: https://ocean-lang.org/code/?a=blobdiff_plain;ds=sidebyside;f=csrc%2Foceani.mdc;h=18e593065705ea6324a6ddf68250142df4ec3d72;hb=19148dce964ae988dab3981c8d7365576fb4767b;hp=520212be6d144365a586638bf695098689460058;hpb=527a8f053f3ee881dad97a5c78e17d9c62862bad;p=ocean diff --git a/csrc/oceani.mdc b/csrc/oceani.mdc index 520212b..18e5930 100644 --- a/csrc/oceani.mdc +++ b/csrc/oceani.mdc @@ -287,14 +287,6 @@ consistent across all the branches. When the variable is not used outside the if, the variables in the different branches are distinct and can be of different types. -Determining the types of all variables early is important for -processing command line arguments. These can be assigned to any of -several types of variable, but we must first know the correct type so -any required conversion can happen. If a variable is associated with -a command line argument but no type can be interpreted (e.g. the -variable is only ever used in a `print` statement), then the type is -set to 'string'. - Undeclared names may only appear in "use" statements and "case" expressions. These names are given a type of "label" and a unique value. This allows them to fill the role of a name in an enumerated type, which @@ -407,11 +399,11 @@ various entities. ### Types Values come in a wide range of types, with more likely to be added. -Each type needs to be able to parse and print its own values (for -convenience at least) as well as to compare two values, at least for -equality and possibly for order. For now, values might need to be -duplicated and freed, though eventually such manipulations will be -better integrated into the language. +Each type needs to be able to print its own values (for convenience at +least) as well as to compare two values, at least for equality and +possibly for order. For now, values might need to be duplicated and +freed, though eventually such manipulations will be better integrated +into the language. Rather than requiring every numeric type to support all numeric operations (add, multiple, etc), we allow types to be able to present @@ -437,7 +429,7 @@ Named type are stored in a simple linked list. Objects of each type are struct type *next; int size, align; void (*init)(struct type *type, struct value *val); - int (*parse)(struct type *type, char *str, struct value *val); + void (*prepare_type)(struct type *type); void (*print)(struct type *type, struct value *val); void (*print_type)(struct type *type, FILE *f); int (*cmp_order)(struct type *t1, struct type *t2, @@ -541,20 +533,13 @@ Named type are stored in a simple linked list. Objects of each type are printf("*Unknown*"); // NOTEST } - static int parse_value(struct type *type, char *arg, - struct value *val) - { - if (type && type->parse) - return type->parse(type, arg, val); - return 0; // NOTEST - } - static struct value *val_alloc(struct type *t, struct value *init) { struct value *ret; - if (!t->size) - val_init(t, NULL); + if (t->prepare_type) + t->prepare_type(t); + ret = calloc(1, t->size); if (init) memcpy(ret, init, t->size); @@ -574,7 +559,6 @@ Named type are stored in a simple linked list. Objects of each type are static int value_cmp(struct type *tl, struct type *tr, struct value *left, struct value *right); static void print_value(struct type *type, struct value *v); - static int parse_value(struct type *type, char *arg, struct value *val); ###### free context types @@ -627,9 +611,6 @@ A separate function encoding these cases will simplify some code later. return require == have; } -When assigning command line arguments to variables, we need to be able -to parse each type from a string. - ###### includes #include #include "parse_string.h" @@ -747,57 +728,10 @@ to parse each type from a string. } } - static int _parse_value(struct type *type, char *arg, struct value *val) - { - struct text tx; - int neg = 0; - char tail[3] = ""; - - switch(type->vtype) { - case Vlabel: // NOTEST - case Vnone: // NOTEST - return 0; // NOTEST - case Vstr: - val->str.len = strlen(arg); - val->str.txt = malloc(val->str.len); - memcpy(val->str.txt, arg, val->str.len); - break; - case Vnum: - if (*arg == '-') { - neg = 1; - arg++; - } - tx.txt = arg; tx.len = strlen(tx.txt); - if (number_parse(val->num, tail, tx) == 0) - mpq_init(val->num); - else if (neg) - mpq_neg(val->num, val->num); - if (tail[0]) { - printf("Unsupported suffix: %s\n", arg); - return 0; - } - break; - case Vbool: - if (strcasecmp(arg, "true") == 0 || - strcmp(arg, "1") == 0) - val->bool = 1; - else if (strcasecmp(arg, "false") == 0 || - strcmp(arg, "0") == 0) - val->bool = 0; - else { - printf("Bad bool: %s\n", arg); - return 0; - } - break; - } - return 1; - } - static void _free_value(struct type *type, struct value *v); static struct type base_prototype = { .init = _val_init, - .parse = _parse_value, .print = _print_value, .cmp_order = _value_cmp, .cmp_eq = _value_cmp, @@ -1505,8 +1439,10 @@ in `rval`. { struct lrval ret = _interp_exec(e); - if (typeret) + if (ret.lval) *typeret = ret.type; + else + free_value(ret.type, &ret.rval); return ret.lval; } @@ -1553,10 +1489,6 @@ different phases of parse, analyse, print, interpret. Thus far we have arrays and structs. -Some complex types need do not exist in a name table, so they are kept -on a linked list in the context (`anon_typelist`). This allows them to -be freed when parsing is complete. - #### Arrays Arrays can be declared by giving a size and a type, as `[size]type' so @@ -1579,25 +1511,27 @@ make a copy of an array with controllable recursive depth. struct type *member; } array; -###### value union fields - void *array; - ###### value functions - static void array_init(struct type *type, struct value *val) + static void array_prepare_type(struct type *type) { - int i; + mpz_t q; + if (!type->array.vsize) + return; + + mpz_init(q); + mpz_tdiv_q(q, mpq_numref(type->array.vsize->val->num), + mpq_denref(type->array.vsize->val->num)); + type->array.size = mpz_get_si(q); + mpz_clear(q); - if (type->array.vsize) { - mpz_t q; - mpz_init(q); - mpz_tdiv_q(q, mpq_numref(type->array.vsize->val->num), - mpq_denref(type->array.vsize->val->num)); - type->array.size = mpz_get_si(q); - mpz_clear(q); - } type->size = type->array.size * type->array.member->size; type->align = type->array.member->align; + } + + static void array_init(struct type *type, struct value *val) + { + int i; if (!val) return; @@ -1645,27 +1579,32 @@ make a copy of an array with controllable recursive depth. static struct type array_prototype = { .init = array_init, + .prepare_type = array_prepare_type, .print_type = array_print_type, .compat = array_compat, .free = array_free, }; +###### declare terminals + $TERM [ ] + ###### type grammar - | [ NUMBER ] Type ${ - $0 = calloc(1, sizeof(struct type)); - *($0) = array_prototype; - $0->array.member = $<4; - $0->array.vsize = NULL; - { + | [ NUMBER ] Type ${ { char tail[3]; mpq_t num; + struct text noname = { "", 0 }; + struct type *t; + + $0 = t = add_type(c, noname, &array_prototype); + t->array.member = $<4; + t->array.vsize = NULL; if (number_parse(num, tail, $2.txt) == 0) tok_err(c, "error: unrecognised number", &$2); else if (tail[0]) tok_err(c, "error: unsupported number suffix", &$2); else { - $0->array.size = mpz_get_ui(mpq_numref(num)); + t->array.size = mpz_get_ui(mpq_numref(num)); if (mpz_cmp_ui(mpq_denref(num), 1) != 0) { tok_err(c, "error: array size must be an integer", &$2); @@ -1674,41 +1613,25 @@ make a copy of an array with controllable recursive depth. &$2); mpq_clear(num); } - $0->next = c->anon_typelist; - c->anon_typelist = $0; - } - }$ + t->size = t->array.size * t->array.member->size; + t->align = t->array.member->align; + } }$ | [ IDENTIFIER ] Type ${ { struct variable *v = var_ref(c, $2.txt); + struct text noname = { "", 0 }; if (!v) tok_err(c, "error: name undeclared", &$2); else if (!v->constant) tok_err(c, "error: array size must be a constant", &$2); - $0 = calloc(1, sizeof(struct type)); - *($0) = array_prototype; + $0 = add_type(c, noname, &array_prototype); $0->array.member = $<4; $0->array.size = 0; $0->array.vsize = v; - $0->next = c->anon_typelist; - c->anon_typelist = $0; } }$ -###### parse context - - struct type *anon_typelist; - -###### free context types - - while (context.anon_typelist) { - struct type *t = context.anon_typelist; - - context.anon_typelist = t->next; - free(t); - } - ###### Binode types Index, @@ -1886,6 +1809,9 @@ function will be needed. free(e); break; +###### declare terminals + $TERM struct . + ###### variable grammar | Variable . IDENTIFIER ${ { @@ -2132,6 +2058,8 @@ an executable. ###### Grammar + $TERM True False + $*val Value -> True ${ $0 = new_val(Tbool, $1); @@ -2253,6 +2181,8 @@ link to find the primary instance. ###### Grammar + $TERM : :: + $*var VariableDecl -> IDENTIFIER : ${ { struct variable *v = var_decl(c, $1.txt); @@ -2772,9 +2702,14 @@ expression operator, and the `CMPop` non-terminal will match one of them. ### Expressions: The rest -The remaining expressions with the highest precedence are arithmetic and -string concatenation. String concatenation (`++`) has the same -precedence as multiplication and division, but lower than the uniary. +The remaining expressions with the highest precedence are arithmetic, +string concatenation, and string conversion. String concatenation +(`++`) has the same precedence as multiplication and division, but lower +than the uniary. + +String conversion is a temporary feature until I get a better type +system. `$` is a prefix operator which expects a string and returns +a number. `+` and `-` are both infix and prefix operations (where they are absolute value and negation). These have different operator names. @@ -2788,12 +2723,13 @@ should only insert brackets were needed for precedence. Times, Divide, Rem, Concat, Absolute, Negate, + StringConv, Bracket, ###### expr precedence $LEFT + - Eop $LEFT * / % ++ Top - $LEFT Uop + $LEFT Uop $ $TERM ( ) ###### expression grammar @@ -2834,6 +2770,7 @@ should only insert brackets were needed for precedence. Uop -> + ${ $0.op = Absolute; }$ | - ${ $0.op = Negate; }$ + | $ ${ $0.op = StringConv; }$ Top -> * ${ $0.op = Times; }$ | / ${ $0.op = Divide; }$ @@ -2862,14 +2799,15 @@ should only insert brackets were needed for precedence. if (bracket) printf(")"); break; case Absolute: - if (bracket) printf("("); - printf("+"); - print_exec(b->right, indent, bracket); - if (bracket) printf(")"); - break; case Negate: + case StringConv: if (bracket) printf("("); - printf("-"); + switch (b->op) { + case Absolute: fputs("+", stdout); break; + case Negate: fputs("-", stdout); break; + case StringConv: fputs("$", stdout); break; + default: abort(); // NOTEST + } // NOTEST print_exec(b->right, indent, bracket); if (bracket) printf(")"); break; @@ -2906,6 +2844,15 @@ should only insert brackets were needed for precedence. Tstr, rules, type); return Tstr; + case StringConv: + /* op must be string, result is number */ + propagate_types(b->left, c, ok, Tstr, 0); + if (!type_compat(type, Tnum, 0)) + type_err(c, + "error: Can only convert string to number, not %1", + prog, type, 0, NULL); + return Tnum; + case Bracket: return propagate_types(b->right, c, ok, type, 0); @@ -2963,6 +2910,27 @@ should only insert brackets were needed for precedence. rvtype = Tstr; rv.str = text_join(left.str, right.str); break; + case StringConv: + right = interp_exec(b->right, &rvtype); + rtype = Tstr; + rvtype = Tnum; + + struct text tx = right.str; + char tail[3]; + int neg = 0; + if (tx.txt[0] == '-') { + neg = 1; + tx.txt++; + tx.len--; + } + if (number_parse(rv.num, tail, tx) == 0) + mpq_init(rv.num); + else if (neg) + mpq_neg(rv.num, rv.num); + if (tail[0]) + printf("Unsupported suffix: %.*s\n", tx.len, tx.txt); + + break; ###### value functions @@ -3032,11 +3000,10 @@ is in-place. ###### Binode types Block, -###### expr precedence - $TERM pass - ###### Grammar + $TERM { } ; + $*binode Block -> { IN OptNL Statementlist OUT OptNL } ${ $0 = $right = $<1; }$ + $TERM pass SimpleStatement -> pass ${ $0 = NULL; }$ | ERROR ${ tok_err(c, "Syntax error in statement", &$1); }$ ## SimpleStatement Grammar @@ -3281,6 +3249,9 @@ it is declared, and error will be raised as the name is created as Assign, Declare, +###### declare terminals + $TERM = + ###### SimpleStatement Grammar | Variable = Expression ${ $0 = new(binode); @@ -3396,14 +3367,13 @@ it is declared, and error will be raised as the name is created as struct variable *v = cast(var, b->left)->var; if (v->merged) v = v->merged; + free_value(v->type, v->val); + free(v->val); if (b->right) { right = interp_exec(b->right, &rtype); - free_value(v->type, v->val); - free(v->val); v->val = val_alloc(v->type, &right); rtype = Tnone; } else { - free_value(v->type, v->val); v->val = val_alloc(v->type, NULL); } break; @@ -3951,6 +3921,8 @@ various declarations in the parse context. $void Ocean -> OptNL DeclarationList + ## declare terminals + OptNL -> | OptNL NEWLINE Newlines -> NEWLINE @@ -3994,6 +3966,8 @@ searching through for the Nth constant for decreasing N. ###### top level grammar + $TERM const + DeclareConstant -> const { IN OptNL ConstList OUT OptNL } Newlines | const { SimpleConstList } Newlines | const IN OptNL ConstList OUT Newlines @@ -4100,6 +4074,8 @@ analysis is a bit more interesting at this level. c->prog = $<1; } }$ + $TERM program + $*binode Program -> program OpenScope Varlist ColonBlock Newlines ${ $0 = new(binode); @@ -4209,8 +4185,9 @@ analysis is a bit more interesting at this level. v->var->val = vl; } free_value(v->var->type, vl); - if (!parse_value(v->var->type, argv[0], vl)) - exit(1); + vl->str.len = strlen(argv[0]); + vl->str.txt = malloc(vl->str.len); + memcpy(vl->str.txt, argv[0], vl->str.len); argv++; } v = interp_exec(p->right, &vtype); @@ -4248,11 +4225,13 @@ things which will likely grow as the languages grows. name:string alive:Boolean - program A B: + program Astr Bstr: print "Hello World, what lovely oceans you have!" print "Are there", five, "?" print pi, pie, "but", cake + A := $Astr; B := $Bstr + /* When a variable is defined in both branches of an 'if', * and used afterwards, the variables are merged. */