diff --git a/test/fixtures/c/blob.c b/test/fixtures/c/blob.c new file mode 100644 index 00000000..ae320bd8 --- /dev/null +++ b/test/fixtures/c/blob.c @@ -0,0 +1,25 @@ +#include "cache.h" +#include "blob.h" + +const char *blob_type = "blob"; + +struct blob *lookup_blob(const unsigned char *sha1) +{ + struct object *obj = lookup_object(sha1); + if (!obj) + return create_object(sha1, OBJ_BLOB, alloc_blob_node()); + if (!obj->type) + obj->type = OBJ_BLOB; + if (obj->type != OBJ_BLOB) { + error("Object %s is a %s, not a blob", + sha1_to_hex(sha1), typename(obj->type)); + return NULL; + } + return (struct blob *) obj; +} + +int parse_blob_buffer(struct blob *item, void *buffer, unsigned long size) +{ + item->object.parsed = 1; + return 0; +} diff --git a/test/fixtures/c/cache.c b/test/fixtures/c/cache.c new file mode 100644 index 00000000..f8d89403 --- /dev/null +++ b/test/fixtures/c/cache.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "repository.h" +#include "commit.h" +#include "thread-utils.h" +#include "util.h" +#include "cache.h" + +int git_cache_init(git_cache *cache, size_t size, git_cached_obj_freeptr free_ptr) +{ + if (size < 8) + size = 8; + size = git__size_t_powerof2(size); + + cache->size_mask = size - 1; + cache->lru_count = 0; + cache->free_obj = free_ptr; + + git_mutex_init(&cache->lock); + + cache->nodes = git__malloc(size * sizeof(git_cached_obj *)); + GITERR_CHECK_ALLOC(cache->nodes); + + memset(cache->nodes, 0x0, size * sizeof(git_cached_obj *)); + return 0; +} + +void git_cache_free(git_cache *cache) +{ + size_t i; + + for (i = 0; i < (cache->size_mask + 1); ++i) { + if (cache->nodes[i] != NULL) + git_cached_obj_decref(cache->nodes[i], cache->free_obj); + } + + git__free(cache->nodes); +} + +void *git_cache_get(git_cache *cache, const git_oid *oid) +{ + uint32_t hash; + git_cached_obj *node = NULL, *result = NULL; + + memcpy(&hash, oid->id, sizeof(hash)); + + git_mutex_lock(&cache->lock); + { + node = cache->nodes[hash & cache->size_mask]; + + if (node != NULL && git_oid_cmp(&node->oid, oid) == 0) { + git_cached_obj_incref(node); + result = node; + } + } + git_mutex_unlock(&cache->lock); + + return result; +} + +void *git_cache_try_store(git_cache *cache, void *_entry) +{ + git_cached_obj *entry = _entry; + uint32_t hash; + + memcpy(&hash, &entry->oid, sizeof(uint32_t)); + + /* increase the refcount on this object, because + * the cache now owns it */ + git_cached_obj_incref(entry); + + git_mutex_lock(&cache->lock); + { + git_cached_obj *node = cache->nodes[hash & cache->size_mask]; + + if (node == NULL) { + cache->nodes[hash & cache->size_mask] = entry; + } else if (git_oid_cmp(&node->oid, &entry->oid) == 0) { + git_cached_obj_decref(entry, cache->free_obj); + entry = node; + } else { + git_cached_obj_decref(node, cache->free_obj); + cache->nodes[hash & cache->size_mask] = entry; + } + } + git_mutex_unlock(&cache->lock); + + /* increase the refcount again, because we are + * returning it to the user */ + git_cached_obj_incref(entry); + + return entry; +} diff --git a/test/fixtures/c/commit.c b/test/fixtures/c/commit.c new file mode 100644 index 00000000..8248a994 --- /dev/null +++ b/test/fixtures/c/commit.c @@ -0,0 +1,1228 @@ +#include "cache.h" +#include "tag.h" +#include "commit.h" +#include "pkt-line.h" +#include "utf8.h" +#include "diff.h" +#include "revision.h" +#include "notes.h" +#include "gpg-interface.h" +#include "mergesort.h" + +int save_commit_buffer = 1; + +const char *commit_type = "commit"; + +static struct commit *check_commit(struct object *obj, + const unsigned char *sha1, + int quiet) +{ + if (obj->type != OBJ_COMMIT) { + if (!quiet) + error("Object %s is a %s, not a commit", + sha1_to_hex(sha1), typename(obj->type)); + return NULL; + } + return (struct commit *) obj; +} + +struct commit *lookup_commit_reference_gently(const unsigned char *sha1, + int quiet) +{ + struct object *obj = deref_tag(parse_object(sha1), NULL, 0); + + if (!obj) + return NULL; + return check_commit(obj, sha1, quiet); +} + +struct commit *lookup_commit_reference(const unsigned char *sha1) +{ + return lookup_commit_reference_gently(sha1, 0); +} + +struct commit *lookup_commit_or_die(const unsigned char *sha1, const char *ref_name) +{ + struct commit *c = lookup_commit_reference(sha1); + if (!c) + die(_("could not parse %s"), ref_name); + if (hashcmp(sha1, c->object.sha1)) { + warning(_("%s %s is not a commit!"), + ref_name, sha1_to_hex(sha1)); + } + return c; +} + +struct commit *lookup_commit(const unsigned char *sha1) +{ + struct object *obj = lookup_object(sha1); + if (!obj) + return create_object(sha1, OBJ_COMMIT, alloc_commit_node()); + if (!obj->type) + obj->type = OBJ_COMMIT; + return check_commit(obj, sha1, 0); +} + +struct commit *lookup_commit_reference_by_name(const char *name) +{ + unsigned char sha1[20]; + struct commit *commit; + + if (get_sha1(name, sha1)) + return NULL; + commit = lookup_commit_reference(sha1); + if (!commit || parse_commit(commit)) + return NULL; + return commit; +} + +static unsigned long parse_commit_date(const char *buf, const char *tail) +{ + const char *dateptr; + + if (buf + 6 >= tail) + return 0; + if (memcmp(buf, "author", 6)) + return 0; + while (buf < tail && *buf++ != '\n') + /* nada */; + if (buf + 9 >= tail) + return 0; + if (memcmp(buf, "committer", 9)) + return 0; + while (buf < tail && *buf++ != '>') + /* nada */; + if (buf >= tail) + return 0; + dateptr = buf; + while (buf < tail && *buf++ != '\n') + /* nada */; + if (buf >= tail) + return 0; + /* dateptr < buf && buf[-1] == '\n', so strtoul will stop at buf-1 */ + return strtoul(dateptr, NULL, 10); +} + +static struct commit_graft **commit_graft; +static int commit_graft_alloc, commit_graft_nr; + +static int commit_graft_pos(const unsigned char *sha1) +{ + int lo, hi; + lo = 0; + hi = commit_graft_nr; + while (lo < hi) { + int mi = (lo + hi) / 2; + struct commit_graft *graft = commit_graft[mi]; + int cmp = hashcmp(sha1, graft->sha1); + if (!cmp) + return mi; + if (cmp < 0) + hi = mi; + else + lo = mi + 1; + } + return -lo - 1; +} + +int register_commit_graft(struct commit_graft *graft, int ignore_dups) +{ + int pos = commit_graft_pos(graft->sha1); + + if (0 <= pos) { + if (ignore_dups) + free(graft); + else { + free(commit_graft[pos]); + commit_graft[pos] = graft; + } + return 1; + } + pos = -pos - 1; + if (commit_graft_alloc <= ++commit_graft_nr) { + commit_graft_alloc = alloc_nr(commit_graft_alloc); + commit_graft = xrealloc(commit_graft, + sizeof(*commit_graft) * + commit_graft_alloc); + } + if (pos < commit_graft_nr) + memmove(commit_graft + pos + 1, + commit_graft + pos, + (commit_graft_nr - pos - 1) * + sizeof(*commit_graft)); + commit_graft[pos] = graft; + return 0; +} + +struct commit_graft *read_graft_line(char *buf, int len) +{ + /* The format is just "Commit Parent1 Parent2 ...\n" */ + int i; + struct commit_graft *graft = NULL; + + while (len && isspace(buf[len-1])) + buf[--len] = '\0'; + if (buf[0] == '#' || buf[0] == '\0') + return NULL; + if ((len + 1) % 41) + goto bad_graft_data; + i = (len + 1) / 41 - 1; + graft = xmalloc(sizeof(*graft) + 20 * i); + graft->nr_parent = i; + if (get_sha1_hex(buf, graft->sha1)) + goto bad_graft_data; + for (i = 40; i < len; i += 41) { + if (buf[i] != ' ') + goto bad_graft_data; + if (get_sha1_hex(buf + i + 1, graft->parent[i/41])) + goto bad_graft_data; + } + return graft; + +bad_graft_data: + error("bad graft data: %s", buf); + free(graft); + return NULL; +} + +static int read_graft_file(const char *graft_file) +{ + FILE *fp = fopen(graft_file, "r"); + char buf[1024]; + if (!fp) + return -1; + while (fgets(buf, sizeof(buf), fp)) { + /* The format is just "Commit Parent1 Parent2 ...\n" */ + int len = strlen(buf); + struct commit_graft *graft = read_graft_line(buf, len); + if (!graft) + continue; + if (register_commit_graft(graft, 1)) + error("duplicate graft data: %s", buf); + } + fclose(fp); + return 0; +} + +static void prepare_commit_graft(void) +{ + static int commit_graft_prepared; + char *graft_file; + + if (commit_graft_prepared) + return; + graft_file = get_graft_file(); + read_graft_file(graft_file); + /* make sure shallows are read */ + is_repository_shallow(); + commit_graft_prepared = 1; +} + +struct commit_graft *lookup_commit_graft(const unsigned char *sha1) +{ + int pos; + prepare_commit_graft(); + pos = commit_graft_pos(sha1); + if (pos < 0) + return NULL; + return commit_graft[pos]; +} + +int for_each_commit_graft(each_commit_graft_fn fn, void *cb_data) +{ + int i, ret; + for (i = ret = 0; i < commit_graft_nr && !ret; i++) + ret = fn(commit_graft[i], cb_data); + return ret; +} + +int unregister_shallow(const unsigned char *sha1) +{ + int pos = commit_graft_pos(sha1); + if (pos < 0) + return -1; + if (pos + 1 < commit_graft_nr) + memmove(commit_graft + pos, commit_graft + pos + 1, + sizeof(struct commit_graft *) + * (commit_graft_nr - pos - 1)); + commit_graft_nr--; + return 0; +} + +int parse_commit_buffer(struct commit *item, const void *buffer, unsigned long size) +{ + const char *tail = buffer; + const char *bufptr = buffer; + unsigned char parent[20]; + struct commit_list **pptr; + struct commit_graft *graft; + + if (item->object.parsed) + return 0; + item->object.parsed = 1; + tail += size; + if (tail <= bufptr + 46 || memcmp(bufptr, "tree ", 5) || bufptr[45] != '\n') + return error("bogus commit object %s", sha1_to_hex(item->object.sha1)); + if (get_sha1_hex(bufptr + 5, parent) < 0) + return error("bad tree pointer in commit %s", + sha1_to_hex(item->object.sha1)); + item->tree = lookup_tree(parent); + bufptr += 46; /* "tree " + "hex sha1" + "\n" */ + pptr = &item->parents; + + graft = lookup_commit_graft(item->object.sha1); + while (bufptr + 48 < tail && !memcmp(bufptr, "parent ", 7)) { + struct commit *new_parent; + + if (tail <= bufptr + 48 || + get_sha1_hex(bufptr + 7, parent) || + bufptr[47] != '\n') + return error("bad parents in commit %s", sha1_to_hex(item->object.sha1)); + bufptr += 48; + /* + * The clone is shallow if nr_parent < 0, and we must + * not traverse its real parents even when we unhide them. + */ + if (graft && (graft->nr_parent < 0 || grafts_replace_parents)) + continue; + new_parent = lookup_commit(parent); + if (new_parent) + pptr = &commit_list_insert(new_parent, pptr)->next; + } + if (graft) { + int i; + struct commit *new_parent; + for (i = 0; i < graft->nr_parent; i++) { + new_parent = lookup_commit(graft->parent[i]); + if (!new_parent) + continue; + pptr = &commit_list_insert(new_parent, pptr)->next; + } + } + item->date = parse_commit_date(bufptr, tail); + + return 0; +} + +int parse_commit(struct commit *item) +{ + enum object_type type; + void *buffer; + unsigned long size; + int ret; + + if (!item) + return -1; + if (item->object.parsed) + return 0; + buffer = read_sha1_file(item->object.sha1, &type, &size); + if (!buffer) + return error("Could not read %s", + sha1_to_hex(item->object.sha1)); + if (type != OBJ_COMMIT) { + free(buffer); + return error("Object %s not a commit", + sha1_to_hex(item->object.sha1)); + } + ret = parse_commit_buffer(item, buffer, size); + if (save_commit_buffer && !ret) { + item->buffer = buffer; + return 0; + } + free(buffer); + return ret; +} + +int find_commit_subject(const char *commit_buffer, const char **subject) +{ + const char *eol; + const char *p = commit_buffer; + + while (*p && (*p != '\n' || p[1] != '\n')) + p++; + if (*p) { + p += 2; + for (eol = p; *eol && *eol != '\n'; eol++) + ; /* do nothing */ + } else + eol = p; + + *subject = p; + + return eol - p; +} + +struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p) +{ + struct commit_list *new_list = xmalloc(sizeof(struct commit_list)); + new_list->item = item; + new_list->next = *list_p; + *list_p = new_list; + return new_list; +} + +unsigned commit_list_count(const struct commit_list *l) +{ + unsigned c = 0; + for (; l; l = l->next ) + c++; + return c; +} + +void free_commit_list(struct commit_list *list) +{ + while (list) { + struct commit_list *temp = list; + list = temp->next; + free(temp); + } +} + +struct commit_list * commit_list_insert_by_date(struct commit *item, struct commit_list **list) +{ + struct commit_list **pp = list; + struct commit_list *p; + while ((p = *pp) != NULL) { + if (p->item->date < item->date) { + break; + } + pp = &p->next; + } + return commit_list_insert(item, pp); +} + +static int commit_list_compare_by_date(const void *a, const void *b) +{ + unsigned long a_date = ((const struct commit_list *)a)->item->date; + unsigned long b_date = ((const struct commit_list *)b)->item->date; + if (a_date < b_date) + return 1; + if (a_date > b_date) + return -1; + return 0; +} + +static void *commit_list_get_next(const void *a) +{ + return ((const struct commit_list *)a)->next; +} + +static void commit_list_set_next(void *a, void *next) +{ + ((struct commit_list *)a)->next = next; +} + +void commit_list_sort_by_date(struct commit_list **list) +{ + *list = llist_mergesort(*list, commit_list_get_next, commit_list_set_next, + commit_list_compare_by_date); +} + +struct commit *pop_most_recent_commit(struct commit_list **list, + unsigned int mark) +{ + struct commit *ret = (*list)->item; + struct commit_list *parents = ret->parents; + struct commit_list *old = *list; + + *list = (*list)->next; + free(old); + + while (parents) { + struct commit *commit = parents->item; + if (!parse_commit(commit) && !(commit->object.flags & mark)) { + commit->object.flags |= mark; + commit_list_insert_by_date(commit, list); + } + parents = parents->next; + } + return ret; +} + +static void clear_commit_marks_1(struct commit_list **plist, + struct commit *commit, unsigned int mark) +{ + while (commit) { + struct commit_list *parents; + + if (!(mark & commit->object.flags)) + return; + + commit->object.flags &= ~mark; + + parents = commit->parents; + if (!parents) + return; + + while ((parents = parents->next)) + commit_list_insert(parents->item, plist); + + commit = commit->parents->item; + } +} + +void clear_commit_marks(struct commit *commit, unsigned int mark) +{ + struct commit_list *list = NULL; + commit_list_insert(commit, &list); + while (list) + clear_commit_marks_1(&list, pop_commit(&list), mark); +} + +void clear_commit_marks_for_object_array(struct object_array *a, unsigned mark) +{ + struct object *object; + struct commit *commit; + unsigned int i; + + for (i = 0; i < a->nr; i++) { + object = a->objects[i].item; + commit = lookup_commit_reference_gently(object->sha1, 1); + if (commit) + clear_commit_marks(commit, mark); + } +} + +struct commit *pop_commit(struct commit_list **stack) +{ + struct commit_list *top = *stack; + struct commit *item = top ? top->item : NULL; + + if (top) { + *stack = top->next; + free(top); + } + return item; +} + +/* + * Performs an in-place topological sort on the list supplied. + */ +void sort_in_topological_order(struct commit_list ** list, int lifo) +{ + struct commit_list *next, *orig = *list; + struct commit_list *work, **insert; + struct commit_list **pptr; + + if (!orig) + return; + *list = NULL; + + /* Mark them and clear the indegree */ + for (next = orig; next; next = next->next) { + struct commit *commit = next->item; + commit->indegree = 1; + } + + /* update the indegree */ + for (next = orig; next; next = next->next) { + struct commit_list * parents = next->item->parents; + while (parents) { + struct commit *parent = parents->item; + + if (parent->indegree) + parent->indegree++; + parents = parents->next; + } + } + + /* + * find the tips + * + * tips are nodes not reachable from any other node in the list + * + * the tips serve as a starting set for the work queue. + */ + work = NULL; + insert = &work; + for (next = orig; next; next = next->next) { + struct commit *commit = next->item; + + if (commit->indegree == 1) + insert = &commit_list_insert(commit, insert)->next; + } + + /* process the list in topological order */ + if (!lifo) + commit_list_sort_by_date(&work); + + pptr = list; + *list = NULL; + while (work) { + struct commit *commit; + struct commit_list *parents, *work_item; + + work_item = work; + work = work_item->next; + work_item->next = NULL; + + commit = work_item->item; + for (parents = commit->parents; parents ; parents = parents->next) { + struct commit *parent = parents->item; + + if (!parent->indegree) + continue; + + /* + * parents are only enqueued for emission + * when all their children have been emitted thereby + * guaranteeing topological order. + */ + if (--parent->indegree == 1) { + if (!lifo) + commit_list_insert_by_date(parent, &work); + else + commit_list_insert(parent, &work); + } + } + /* + * work_item is a commit all of whose children + * have already been emitted. we can emit it now. + */ + commit->indegree = 0; + *pptr = work_item; + pptr = &work_item->next; + } +} + +/* merge-base stuff */ + +/* bits #0..15 in revision.h */ +#define PARENT1 (1u<<16) +#define PARENT2 (1u<<17) +#define STALE (1u<<18) +#define RESULT (1u<<19) + +static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); + +static struct commit *interesting(struct commit_list *list) +{ + while (list) { + struct commit *commit = list->item; + list = list->next; + if (commit->object.flags & STALE) + continue; + return commit; + } + return NULL; +} + +static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos) +{ + struct commit_list *list = NULL; + struct commit_list *result = NULL; + int i; + + for (i = 0; i < n; i++) { + if (one == twos[i]) + /* + * We do not mark this even with RESULT so we do not + * have to clean it up. + */ + return commit_list_insert(one, &result); + } + + if (parse_commit(one)) + return NULL; + for (i = 0; i < n; i++) { + if (parse_commit(twos[i])) + return NULL; + } + + one->object.flags |= PARENT1; + commit_list_insert_by_date(one, &list); + for (i = 0; i < n; i++) { + twos[i]->object.flags |= PARENT2; + commit_list_insert_by_date(twos[i], &list); + } + + while (interesting(list)) { + struct commit *commit; + struct commit_list *parents; + struct commit_list *next; + int flags; + + commit = list->item; + next = list->next; + free(list); + list = next; + + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); + if (flags == (PARENT1 | PARENT2)) { + if (!(commit->object.flags & RESULT)) { + commit->object.flags |= RESULT; + commit_list_insert_by_date(commit, &result); + } + /* Mark parents of a found merge stale */ + flags |= STALE; + } + parents = commit->parents; + while (parents) { + struct commit *p = parents->item; + parents = parents->next; + if ((p->object.flags & flags) == flags) + continue; + if (parse_commit(p)) + return NULL; + p->object.flags |= flags; + commit_list_insert_by_date(p, &list); + } + } + + /* Clean up the result to remove stale ones */ + free_commit_list(list); + list = result; result = NULL; + while (list) { + struct commit_list *next = list->next; + if (!(list->item->object.flags & STALE)) + commit_list_insert_by_date(list->item, &result); + free(list); + list = next; + } + return result; +} + +struct commit_list *get_octopus_merge_bases(struct commit_list *in) +{ + struct commit_list *i, *j, *k, *ret = NULL; + struct commit_list **pptr = &ret; + + for (i = in; i; i = i->next) { + if (!ret) + pptr = &commit_list_insert(i->item, pptr)->next; + else { + struct commit_list *new = NULL, *end = NULL; + + for (j = ret; j; j = j->next) { + struct commit_list *bases; + bases = get_merge_bases(i->item, j->item, 1); + if (!new) + new = bases; + else + end->next = bases; + for (k = bases; k; k = k->next) + end = k; + } + ret = new; + } + } + return ret; +} + +struct commit_list *get_merge_bases_many(struct commit *one, + int n, + struct commit **twos, + int cleanup) +{ + struct commit_list *list; + struct commit **rslt; + struct commit_list *result; + int cnt, i, j; + + result = merge_bases_many(one, n, twos); + for (i = 0; i < n; i++) { + if (one == twos[i]) + return result; + } + if (!result || !result->next) { + if (cleanup) { + clear_commit_marks(one, all_flags); + for (i = 0; i < n; i++) + clear_commit_marks(twos[i], all_flags); + } + return result; + } + + /* There are more than one */ + cnt = 0; + list = result; + while (list) { + list = list->next; + cnt++; + } + rslt = xcalloc(cnt, sizeof(*rslt)); + for (list = result, i = 0; list; list = list->next) + rslt[i++] = list->item; + free_commit_list(result); + + clear_commit_marks(one, all_flags); + for (i = 0; i < n; i++) + clear_commit_marks(twos[i], all_flags); + for (i = 0; i < cnt - 1; i++) { + for (j = i+1; j < cnt; j++) { + if (!rslt[i] || !rslt[j]) + continue; + result = merge_bases_many(rslt[i], 1, &rslt[j]); + clear_commit_marks(rslt[i], all_flags); + clear_commit_marks(rslt[j], all_flags); + for (list = result; list; list = list->next) { + if (rslt[i] == list->item) + rslt[i] = NULL; + if (rslt[j] == list->item) + rslt[j] = NULL; + } + } + } + + /* Surviving ones in rslt[] are the independent results */ + result = NULL; + for (i = 0; i < cnt; i++) { + if (rslt[i]) + commit_list_insert_by_date(rslt[i], &result); + } + free(rslt); + return result; +} + +struct commit_list *get_merge_bases(struct commit *one, struct commit *two, + int cleanup) +{ + return get_merge_bases_many(one, 1, &two, cleanup); +} + +int is_descendant_of(struct commit *commit, struct commit_list *with_commit) +{ + if (!with_commit) + return 1; + while (with_commit) { + struct commit *other; + + other = with_commit->item; + with_commit = with_commit->next; + if (in_merge_bases(other, &commit, 1)) + return 1; + } + return 0; +} + +int in_merge_bases(struct commit *commit, struct commit **reference, int num) +{ + struct commit_list *bases, *b; + int ret = 0; + + if (num == 1) + bases = get_merge_bases(commit, *reference, 1); + else + die("not yet"); + for (b = bases; b; b = b->next) { + if (!hashcmp(commit->object.sha1, b->item->object.sha1)) { + ret = 1; + break; + } + } + + free_commit_list(bases); + return ret; +} + +struct commit_list *reduce_heads(struct commit_list *heads) +{ + struct commit_list *p; + struct commit_list *result = NULL, **tail = &result; + struct commit **other; + size_t num_head, num_other; + + if (!heads) + return NULL; + + /* Avoid unnecessary reallocations */ + for (p = heads, num_head = 0; p; p = p->next) + num_head++; + other = xcalloc(sizeof(*other), num_head); + + /* For each commit, see if it can be reached by others */ + for (p = heads; p; p = p->next) { + struct commit_list *q, *base; + + /* Do we already have this in the result? */ + for (q = result; q; q = q->next) + if (p->item == q->item) + break; + if (q) + continue; + + num_other = 0; + for (q = heads; q; q = q->next) { + if (p->item == q->item) + continue; + other[num_other++] = q->item; + } + if (num_other) + base = get_merge_bases_many(p->item, num_other, other, 1); + else + base = NULL; + /* + * If p->item does not have anything common with other + * commits, there won't be any merge base. If it is + * reachable from some of the others, p->item will be + * the merge base. If its history is connected with + * others, but p->item is not reachable by others, we + * will get something other than p->item back. + */ + if (!base || (base->item != p->item)) + tail = &(commit_list_insert(p->item, tail)->next); + free_commit_list(base); + } + free(other); + return result; +} + +static const char gpg_sig_header[] = "gpgsig"; +static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1; + +static int do_sign_commit(struct strbuf *buf, const char *keyid) +{ + struct strbuf sig = STRBUF_INIT; + int inspos, copypos; + + /* find the end of the header */ + inspos = strstr(buf->buf, "\n\n") - buf->buf + 1; + + if (!keyid || !*keyid) + keyid = get_signing_key(); + if (sign_buffer(buf, &sig, keyid)) { + strbuf_release(&sig); + return -1; + } + + for (copypos = 0; sig.buf[copypos]; ) { + const char *bol = sig.buf + copypos; + const char *eol = strchrnul(bol, '\n'); + int len = (eol - bol) + !!*eol; + + if (!copypos) { + strbuf_insert(buf, inspos, gpg_sig_header, gpg_sig_header_len); + inspos += gpg_sig_header_len; + } + strbuf_insert(buf, inspos++, " ", 1); + strbuf_insert(buf, inspos, bol, len); + inspos += len; + copypos += len; + } + strbuf_release(&sig); + return 0; +} + +int parse_signed_commit(const unsigned char *sha1, + struct strbuf *payload, struct strbuf *signature) +{ + unsigned long size; + enum object_type type; + char *buffer = read_sha1_file(sha1, &type, &size); + int in_signature, saw_signature = -1; + char *line, *tail; + + if (!buffer || type != OBJ_COMMIT) + goto cleanup; + + line = buffer; + tail = buffer + size; + in_signature = 0; + saw_signature = 0; + while (line < tail) { + const char *sig = NULL; + char *next = memchr(line, '\n', tail - line); + + next = next ? next + 1 : tail; + if (in_signature && line[0] == ' ') + sig = line + 1; + else if (!prefixcmp(line, gpg_sig_header) && + line[gpg_sig_header_len] == ' ') + sig = line + gpg_sig_header_len + 1; + if (sig) { + strbuf_add(signature, sig, next - sig); + saw_signature = 1; + in_signature = 1; + } else { + if (*line == '\n') + /* dump the whole remainder of the buffer */ + next = tail; + strbuf_add(payload, line, next - line); + in_signature = 0; + } + line = next; + } + cleanup: + free(buffer); + return saw_signature; +} + +static void handle_signed_tag(struct commit *parent, struct commit_extra_header ***tail) +{ + struct merge_remote_desc *desc; + struct commit_extra_header *mergetag; + char *buf; + unsigned long size, len; + enum object_type type; + + desc = merge_remote_util(parent); + if (!desc || !desc->obj) + return; + buf = read_sha1_file(desc->obj->sha1, &type, &size); + if (!buf || type != OBJ_TAG) + goto free_return; + len = parse_signature(buf, size); + if (size == len) + goto free_return; + /* + * We could verify this signature and either omit the tag when + * it does not validate, but the integrator may not have the + * public key of the signer of the tag he is merging, while a + * later auditor may have it while auditing, so let's not run + * verify-signed-buffer here for now... + * + * if (verify_signed_buffer(buf, len, buf + len, size - len, ...)) + * warn("warning: signed tag unverified."); + */ + mergetag = xcalloc(1, sizeof(*mergetag)); + mergetag->key = xstrdup("mergetag"); + mergetag->value = buf; + mergetag->len = size; + + **tail = mergetag; + *tail = &mergetag->next; + return; + +free_return: + free(buf); +} + +void append_merge_tag_headers(struct commit_list *parents, + struct commit_extra_header ***tail) +{ + while (parents) { + struct commit *parent = parents->item; + handle_signed_tag(parent, tail); + parents = parents->next; + } +} + +static void add_extra_header(struct strbuf *buffer, + struct commit_extra_header *extra) +{ + strbuf_addstr(buffer, extra->key); + if (extra->len) + strbuf_add_lines(buffer, " ", extra->value, extra->len); + else + strbuf_addch(buffer, '\n'); +} + +struct commit_extra_header *read_commit_extra_headers(struct commit *commit, + const char **exclude) +{ + struct commit_extra_header *extra = NULL; + unsigned long size; + enum object_type type; + char *buffer = read_sha1_file(commit->object.sha1, &type, &size); + if (buffer && type == OBJ_COMMIT) + extra = read_commit_extra_header_lines(buffer, size, exclude); + free(buffer); + return extra; +} + +static inline int standard_header_field(const char *field, size_t len) +{ + return ((len == 4 && !memcmp(field, "tree ", 5)) || + (len == 6 && !memcmp(field, "parent ", 7)) || + (len == 6 && !memcmp(field, "author ", 7)) || + (len == 9 && !memcmp(field, "committer ", 10)) || + (len == 8 && !memcmp(field, "encoding ", 9))); +} + +static int excluded_header_field(const char *field, size_t len, const char **exclude) +{ + if (!exclude) + return 0; + + while (*exclude) { + size_t xlen = strlen(*exclude); + if (len == xlen && + !memcmp(field, *exclude, xlen) && field[xlen] == ' ') + return 1; + exclude++; + } + return 0; +} + +struct commit_extra_header *read_commit_extra_header_lines(const char *buffer, size_t size, + const char **exclude) +{ + struct commit_extra_header *extra = NULL, **tail = &extra, *it = NULL; + const char *line, *next, *eof, *eob; + struct strbuf buf = STRBUF_INIT; + + for (line = buffer, eob = line + size; + line < eob && *line != '\n'; + line = next) { + next = memchr(line, '\n', eob - line); + next = next ? next + 1 : eob; + if (*line == ' ') { + /* continuation */ + if (it) + strbuf_add(&buf, line + 1, next - (line + 1)); + continue; + } + if (it) + it->value = strbuf_detach(&buf, &it->len); + strbuf_reset(&buf); + it = NULL; + + eof = strchr(line, ' '); + if (next <= eof) + eof = next; + + if (standard_header_field(line, eof - line) || + excluded_header_field(line, eof - line, exclude)) + continue; + + it = xcalloc(1, sizeof(*it)); + it->key = xmemdupz(line, eof-line); + *tail = it; + tail = &it->next; + if (eof + 1 < next) + strbuf_add(&buf, eof + 1, next - (eof + 1)); + } + if (it) + it->value = strbuf_detach(&buf, &it->len); + return extra; +} + +void free_commit_extra_headers(struct commit_extra_header *extra) +{ + while (extra) { + struct commit_extra_header *next = extra->next; + free(extra->key); + free(extra->value); + free(extra); + extra = next; + } +} + +int commit_tree(const struct strbuf *msg, unsigned char *tree, + struct commit_list *parents, unsigned char *ret, + const char *author, const char *sign_commit) +{ + struct commit_extra_header *extra = NULL, **tail = &extra; + int result; + + append_merge_tag_headers(parents, &tail); + result = commit_tree_extended(msg, tree, parents, ret, + author, sign_commit, extra); + free_commit_extra_headers(extra); + return result; +} + +static const char commit_utf8_warn[] = +"Warning: commit message does not conform to UTF-8.\n" +"You may want to amend it after fixing the message, or set the config\n" +"variable i18n.commitencoding to the encoding your project uses.\n"; + +int commit_tree_extended(const struct strbuf *msg, unsigned char *tree, + struct commit_list *parents, unsigned char *ret, + const char *author, const char *sign_commit, + struct commit_extra_header *extra) +{ + int result; + int encoding_is_utf8; + struct strbuf buffer; + + assert_sha1_type(tree, OBJ_TREE); + + if (memchr(msg->buf, '\0', msg->len)) + return error("a NUL byte in commit log message not allowed."); + + /* Not having i18n.commitencoding is the same as having utf-8 */ + encoding_is_utf8 = is_encoding_utf8(git_commit_encoding); + + strbuf_init(&buffer, 8192); /* should avoid reallocs for the headers */ + strbuf_addf(&buffer, "tree %s\n", sha1_to_hex(tree)); + + /* + * NOTE! This ordering means that the same exact tree merged with a + * different order of parents will be a _different_ changeset even + * if everything else stays the same. + */ + while (parents) { + struct commit_list *next = parents->next; + struct commit *parent = parents->item; + + strbuf_addf(&buffer, "parent %s\n", + sha1_to_hex(parent->object.sha1)); + free(parents); + parents = next; + } + + /* Person/date information */ + if (!author) + author = git_author_info(IDENT_STRICT); + strbuf_addf(&buffer, "author %s\n", author); + strbuf_addf(&buffer, "committer %s\n", git_committer_info(IDENT_STRICT)); + if (!encoding_is_utf8) + strbuf_addf(&buffer, "encoding %s\n", git_commit_encoding); + + while (extra) { + add_extra_header(&buffer, extra); + extra = extra->next; + } + strbuf_addch(&buffer, '\n'); + + /* And add the comment */ + strbuf_addbuf(&buffer, msg); + + /* And check the encoding */ + if (encoding_is_utf8 && !is_utf8(buffer.buf)) + fprintf(stderr, commit_utf8_warn); + + if (sign_commit && do_sign_commit(&buffer, sign_commit)) + return -1; + + result = write_sha1_file(buffer.buf, buffer.len, commit_type, ret); + strbuf_release(&buffer); + return result; +} + +struct commit *get_merge_parent(const char *name) +{ + struct object *obj; + struct commit *commit; + unsigned char sha1[20]; + if (get_sha1(name, sha1)) + return NULL; + obj = parse_object(sha1); + commit = (struct commit *)peel_to_type(name, 0, obj, OBJ_COMMIT); + if (commit && !commit->util) { + struct merge_remote_desc *desc; + desc = xmalloc(sizeof(*desc)); + desc->obj = obj; + desc->name = strdup(name); + commit->util = desc; + } + return commit; +} + +/* + * Append a commit to the end of the commit_list. + * + * next starts by pointing to the variable that holds the head of an + * empty commit_list, and is updated to point to the "next" field of + * the last item on the list as new commits are appended. + * + * Usage example: + * + * struct commit_list *list; + * struct commit_list **next = &list; + * + * next = commit_list_append(c1, next); + * next = commit_list_append(c2, next); + * assert(commit_list_count(list) == 2); + * return list; + */ +struct commit_list **commit_list_append(struct commit *commit, + struct commit_list **next) +{ + struct commit_list *new = xmalloc(sizeof(struct commit_list)); + new->item = commit; + *next = new; + new->next = NULL; + return &new->next; +} diff --git a/test/fixtures/c/cpu.c b/test/fixtures/c/cpu.c new file mode 100644 index 00000000..a4eb5227 --- /dev/null +++ b/test/fixtures/c/cpu.c @@ -0,0 +1,725 @@ +/* CPU control. + * (C) 2001, 2002, 2003, 2004 Rusty Russell + * + * This code is licenced under the GPL. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "smpboot.h" + +#ifdef CONFIG_SMP +/* Serializes the updates to cpu_online_mask, cpu_present_mask */ +static DEFINE_MUTEX(cpu_add_remove_lock); + +/* + * The following two API's must be used when attempting + * to serialize the updates to cpu_online_mask, cpu_present_mask. + */ +void cpu_maps_update_begin(void) +{ + mutex_lock(&cpu_add_remove_lock); +} + +void cpu_maps_update_done(void) +{ + mutex_unlock(&cpu_add_remove_lock); +} + +static RAW_NOTIFIER_HEAD(cpu_chain); + +/* If set, cpu_up and cpu_down will return -EBUSY and do nothing. + * Should always be manipulated under cpu_add_remove_lock + */ +static int cpu_hotplug_disabled; + +#ifdef CONFIG_HOTPLUG_CPU + +static struct { + struct task_struct *active_writer; + struct mutex lock; /* Synchronizes accesses to refcount, */ + /* + * Also blocks the new readers during + * an ongoing cpu hotplug operation. + */ + int refcount; +} cpu_hotplug = { + .active_writer = NULL, + .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), + .refcount = 0, +}; + +void get_online_cpus(void) +{ + might_sleep(); + if (cpu_hotplug.active_writer == current) + return; + mutex_lock(&cpu_hotplug.lock); + cpu_hotplug.refcount++; + mutex_unlock(&cpu_hotplug.lock); + +} +EXPORT_SYMBOL_GPL(get_online_cpus); + +void put_online_cpus(void) +{ + if (cpu_hotplug.active_writer == current) + return; + mutex_lock(&cpu_hotplug.lock); + if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) + wake_up_process(cpu_hotplug.active_writer); + mutex_unlock(&cpu_hotplug.lock); + +} +EXPORT_SYMBOL_GPL(put_online_cpus); + +/* + * This ensures that the hotplug operation can begin only when the + * refcount goes to zero. + * + * Note that during a cpu-hotplug operation, the new readers, if any, + * will be blocked by the cpu_hotplug.lock + * + * Since cpu_hotplug_begin() is always called after invoking + * cpu_maps_update_begin(), we can be sure that only one writer is active. + * + * Note that theoretically, there is a possibility of a livelock: + * - Refcount goes to zero, last reader wakes up the sleeping + * writer. + * - Last reader unlocks the cpu_hotplug.lock. + * - A new reader arrives at this moment, bumps up the refcount. + * - The writer acquires the cpu_hotplug.lock finds the refcount + * non zero and goes to sleep again. + * + * However, this is very difficult to achieve in practice since + * get_online_cpus() not an api which is called all that often. + * + */ +static void cpu_hotplug_begin(void) +{ + cpu_hotplug.active_writer = current; + + for (;;) { + mutex_lock(&cpu_hotplug.lock); + if (likely(!cpu_hotplug.refcount)) + break; + __set_current_state(TASK_UNINTERRUPTIBLE); + mutex_unlock(&cpu_hotplug.lock); + schedule(); + } +} + +static void cpu_hotplug_done(void) +{ + cpu_hotplug.active_writer = NULL; + mutex_unlock(&cpu_hotplug.lock); +} + +#else /* #if CONFIG_HOTPLUG_CPU */ +static void cpu_hotplug_begin(void) {} +static void cpu_hotplug_done(void) {} +#endif /* #else #if CONFIG_HOTPLUG_CPU */ + +/* Need to know about CPUs going up/down? */ +int __ref register_cpu_notifier(struct notifier_block *nb) +{ + int ret; + cpu_maps_update_begin(); + ret = raw_notifier_chain_register(&cpu_chain, nb); + cpu_maps_update_done(); + return ret; +} + +static int __cpu_notify(unsigned long val, void *v, int nr_to_call, + int *nr_calls) +{ + int ret; + + ret = __raw_notifier_call_chain(&cpu_chain, val, v, nr_to_call, + nr_calls); + + return notifier_to_errno(ret); +} + +static int cpu_notify(unsigned long val, void *v) +{ + return __cpu_notify(val, v, -1, NULL); +} + +#ifdef CONFIG_HOTPLUG_CPU + +static void cpu_notify_nofail(unsigned long val, void *v) +{ + BUG_ON(cpu_notify(val, v)); +} +EXPORT_SYMBOL(register_cpu_notifier); + +void __ref unregister_cpu_notifier(struct notifier_block *nb) +{ + cpu_maps_update_begin(); + raw_notifier_chain_unregister(&cpu_chain, nb); + cpu_maps_update_done(); +} +EXPORT_SYMBOL(unregister_cpu_notifier); + +/** + * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU + * @cpu: a CPU id + * + * This function walks all processes, finds a valid mm struct for each one and + * then clears a corresponding bit in mm's cpumask. While this all sounds + * trivial, there are various non-obvious corner cases, which this function + * tries to solve in a safe manner. + * + * Also note that the function uses a somewhat relaxed locking scheme, so it may + * be called only for an already offlined CPU. + */ +void clear_tasks_mm_cpumask(int cpu) +{ + struct task_struct *p; + + /* + * This function is called after the cpu is taken down and marked + * offline, so its not like new tasks will ever get this cpu set in + * their mm mask. -- Peter Zijlstra + * Thus, we may use rcu_read_lock() here, instead of grabbing + * full-fledged tasklist_lock. + */ + WARN_ON(cpu_online(cpu)); + rcu_read_lock(); + for_each_process(p) { + struct task_struct *t; + + /* + * Main thread might exit, but other threads may still have + * a valid mm. Find one. + */ + t = find_lock_task_mm(p); + if (!t) + continue; + cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); + task_unlock(t); + } + rcu_read_unlock(); +} + +static inline void check_for_tasks(int cpu) +{ + struct task_struct *p; + + write_lock_irq(&tasklist_lock); + for_each_process(p) { + if (task_cpu(p) == cpu && p->state == TASK_RUNNING && + (p->utime || p->stime)) + printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " + "(state = %ld, flags = %x)\n", + p->comm, task_pid_nr(p), cpu, + p->state, p->flags); + } + write_unlock_irq(&tasklist_lock); +} + +struct take_cpu_down_param { + unsigned long mod; + void *hcpu; +}; + +/* Take this CPU down. */ +static int __ref take_cpu_down(void *_param) +{ + struct take_cpu_down_param *param = _param; + int err; + + /* Ensure this CPU doesn't handle any more interrupts. */ + err = __cpu_disable(); + if (err < 0) + return err; + + cpu_notify(CPU_DYING | param->mod, param->hcpu); + return 0; +} + +/* Requires cpu_add_remove_lock to be held */ +static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) +{ + int err, nr_calls = 0; + void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; + struct take_cpu_down_param tcd_param = { + .mod = mod, + .hcpu = hcpu, + }; + + if (num_online_cpus() == 1) + return -EBUSY; + + if (!cpu_online(cpu)) + return -EINVAL; + + cpu_hotplug_begin(); + + err = __cpu_notify(CPU_DOWN_PREPARE | mod, hcpu, -1, &nr_calls); + if (err) { + nr_calls--; + __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); + printk("%s: attempt to take down CPU %u failed\n", + __func__, cpu); + goto out_release; + } + + err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); + if (err) { + /* CPU didn't die: tell everyone. Can't complain. */ + cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + + goto out_release; + } + BUG_ON(cpu_online(cpu)); + + /* + * The migration_call() CPU_DYING callback will have removed all + * runnable tasks from the cpu, there's only the idle task left now + * that the migration thread is done doing the stop_machine thing. + * + * Wait for the stop thread to go away. + */ + while (!idle_cpu(cpu)) + cpu_relax(); + + /* This actually kills the CPU. */ + __cpu_die(cpu); + + /* CPU is completely dead: tell everyone. Too late to complain. */ + cpu_notify_nofail(CPU_DEAD | mod, hcpu); + + check_for_tasks(cpu); + +out_release: + cpu_hotplug_done(); + if (!err) + cpu_notify_nofail(CPU_POST_DEAD | mod, hcpu); + return err; +} + +int __ref cpu_down(unsigned int cpu) +{ + int err; + + cpu_maps_update_begin(); + + if (cpu_hotplug_disabled) { + err = -EBUSY; + goto out; + } + + err = _cpu_down(cpu, 0); + +out: + cpu_maps_update_done(); + return err; +} +EXPORT_SYMBOL(cpu_down); +#endif /*CONFIG_HOTPLUG_CPU*/ + +/* Requires cpu_add_remove_lock to be held */ +static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) +{ + int ret, nr_calls = 0; + void *hcpu = (void *)(long)cpu; + unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; + struct task_struct *idle; + + if (cpu_online(cpu) || !cpu_present(cpu)) + return -EINVAL; + + cpu_hotplug_begin(); + + idle = idle_thread_get(cpu); + if (IS_ERR(idle)) { + ret = PTR_ERR(idle); + goto out; + } + + ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); + if (ret) { + nr_calls--; + printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", + __func__, cpu); + goto out_notify; + } + + /* Arch-specific enabling code. */ + ret = __cpu_up(cpu, idle); + if (ret != 0) + goto out_notify; + BUG_ON(!cpu_online(cpu)); + + /* Now call notifier in preparation. */ + cpu_notify(CPU_ONLINE | mod, hcpu); + +out_notify: + if (ret != 0) + __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); +out: + cpu_hotplug_done(); + + return ret; +} + +int __cpuinit cpu_up(unsigned int cpu) +{ + int err = 0; + +#ifdef CONFIG_MEMORY_HOTPLUG + int nid; + pg_data_t *pgdat; +#endif + + if (!cpu_possible(cpu)) { + printk(KERN_ERR "can't online cpu %d because it is not " + "configured as may-hotadd at boot time\n", cpu); +#if defined(CONFIG_IA64) + printk(KERN_ERR "please check additional_cpus= boot " + "parameter\n"); +#endif + return -EINVAL; + } + +#ifdef CONFIG_MEMORY_HOTPLUG + nid = cpu_to_node(cpu); + if (!node_online(nid)) { + err = mem_online_node(nid); + if (err) + return err; + } + + pgdat = NODE_DATA(nid); + if (!pgdat) { + printk(KERN_ERR + "Can't online cpu %d due to NULL pgdat\n", cpu); + return -ENOMEM; + } + + if (pgdat->node_zonelists->_zonerefs->zone == NULL) { + mutex_lock(&zonelists_mutex); + build_all_zonelists(NULL); + mutex_unlock(&zonelists_mutex); + } +#endif + + cpu_maps_update_begin(); + + if (cpu_hotplug_disabled) { + err = -EBUSY; + goto out; + } + + err = _cpu_up(cpu, 0); + +out: + cpu_maps_update_done(); + return err; +} +EXPORT_SYMBOL_GPL(cpu_up); + +#ifdef CONFIG_PM_SLEEP_SMP +static cpumask_var_t frozen_cpus; + +void __weak arch_disable_nonboot_cpus_begin(void) +{ +} + +void __weak arch_disable_nonboot_cpus_end(void) +{ +} + +int disable_nonboot_cpus(void) +{ + int cpu, first_cpu, error = 0; + + cpu_maps_update_begin(); + first_cpu = cpumask_first(cpu_online_mask); + /* + * We take down all of the non-boot CPUs in one shot to avoid races + * with the userspace trying to use the CPU hotplug at the same time + */ + cpumask_clear(frozen_cpus); + arch_disable_nonboot_cpus_begin(); + + printk("Disabling non-boot CPUs ...\n"); + for_each_online_cpu(cpu) { + if (cpu == first_cpu) + continue; + error = _cpu_down(cpu, 1); + if (!error) + cpumask_set_cpu(cpu, frozen_cpus); + else { + printk(KERN_ERR "Error taking CPU%d down: %d\n", + cpu, error); + break; + } + } + + arch_disable_nonboot_cpus_end(); + + if (!error) { + BUG_ON(num_online_cpus() > 1); + /* Make sure the CPUs won't be enabled by someone else */ + cpu_hotplug_disabled = 1; + } else { + printk(KERN_ERR "Non-boot CPUs are not disabled\n"); + } + cpu_maps_update_done(); + return error; +} + +void __weak arch_enable_nonboot_cpus_begin(void) +{ +} + +void __weak arch_enable_nonboot_cpus_end(void) +{ +} + +void __ref enable_nonboot_cpus(void) +{ + int cpu, error; + + /* Allow everyone to use the CPU hotplug again */ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + if (cpumask_empty(frozen_cpus)) + goto out; + + printk(KERN_INFO "Enabling non-boot CPUs ...\n"); + + arch_enable_nonboot_cpus_begin(); + + for_each_cpu(cpu, frozen_cpus) { + error = _cpu_up(cpu, 1); + if (!error) { + printk(KERN_INFO "CPU%d is up\n", cpu); + continue; + } + printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); + } + + arch_enable_nonboot_cpus_end(); + + cpumask_clear(frozen_cpus); +out: + cpu_maps_update_done(); +} + +static int __init alloc_frozen_cpus(void) +{ + if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) + return -ENOMEM; + return 0; +} +core_initcall(alloc_frozen_cpus); + +/* + * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU + * hotplug when tasks are about to be frozen. Also, don't allow the freezer + * to continue until any currently running CPU hotplug operation gets + * completed. + * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the + * 'cpu_add_remove_lock'. And this same lock is also taken by the regular + * CPU hotplug path and released only after it is complete. Thus, we + * (and hence the freezer) will block here until any currently running CPU + * hotplug operation gets completed. + */ +void cpu_hotplug_disable_before_freeze(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + + +/* + * When tasks have been thawed, re-enable regular CPU hotplug (which had been + * disabled while beginning to freeze tasks). + */ +void cpu_hotplug_enable_after_thaw(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + +/* + * When callbacks for CPU hotplug notifications are being executed, we must + * ensure that the state of the system with respect to the tasks being frozen + * or not, as reported by the notification, remains unchanged *throughout the + * duration* of the execution of the callbacks. + * Hence we need to prevent the freezer from racing with regular CPU hotplug. + * + * This synchronization is implemented by mutually excluding regular CPU + * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ + * Hibernate notifications. + */ +static int +cpu_hotplug_pm_callback(struct notifier_block *nb, + unsigned long action, void *ptr) +{ + switch (action) { + + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + cpu_hotplug_disable_before_freeze(); + break; + + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + cpu_hotplug_enable_after_thaw(); + break; + + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + + +static int __init cpu_hotplug_pm_sync_init(void) +{ + pm_notifier(cpu_hotplug_pm_callback, 0); + return 0; +} +core_initcall(cpu_hotplug_pm_sync_init); + +#endif /* CONFIG_PM_SLEEP_SMP */ + +/** + * notify_cpu_starting(cpu) - call the CPU_STARTING notifiers + * @cpu: cpu that just started + * + * This function calls the cpu_chain notifiers with CPU_STARTING. + * It must be called by the arch code on the new cpu, before the new cpu + * enables interrupts and before the "boot" cpu returns from __cpu_up(). + */ +void __cpuinit notify_cpu_starting(unsigned int cpu) +{ + unsigned long val = CPU_STARTING; + +#ifdef CONFIG_PM_SLEEP_SMP + if (frozen_cpus != NULL && cpumask_test_cpu(cpu, frozen_cpus)) + val = CPU_STARTING_FROZEN; +#endif /* CONFIG_PM_SLEEP_SMP */ + cpu_notify(val, (void *)(long)cpu); +} + +#endif /* CONFIG_SMP */ + +/* + * cpu_bit_bitmap[] is a special, "compressed" data structure that + * represents all NR_CPUS bits binary values of 1< 32 + MASK_DECLARE_8(32), MASK_DECLARE_8(40), + MASK_DECLARE_8(48), MASK_DECLARE_8(56), +#endif +}; +EXPORT_SYMBOL_GPL(cpu_bit_bitmap); + +const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; +EXPORT_SYMBOL(cpu_all_bits); + +#ifdef CONFIG_INIT_ALL_POSSIBLE +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly + = CPU_BITS_ALL; +#else +static DECLARE_BITMAP(cpu_possible_bits, CONFIG_NR_CPUS) __read_mostly; +#endif +const struct cpumask *const cpu_possible_mask = to_cpumask(cpu_possible_bits); +EXPORT_SYMBOL(cpu_possible_mask); + +static DECLARE_BITMAP(cpu_online_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_online_mask = to_cpumask(cpu_online_bits); +EXPORT_SYMBOL(cpu_online_mask); + +static DECLARE_BITMAP(cpu_present_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_present_mask = to_cpumask(cpu_present_bits); +EXPORT_SYMBOL(cpu_present_mask); + +static DECLARE_BITMAP(cpu_active_bits, CONFIG_NR_CPUS) __read_mostly; +const struct cpumask *const cpu_active_mask = to_cpumask(cpu_active_bits); +EXPORT_SYMBOL(cpu_active_mask); + +void set_cpu_possible(unsigned int cpu, bool possible) +{ + if (possible) + cpumask_set_cpu(cpu, to_cpumask(cpu_possible_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_possible_bits)); +} + +void set_cpu_present(unsigned int cpu, bool present) +{ + if (present) + cpumask_set_cpu(cpu, to_cpumask(cpu_present_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_present_bits)); +} + +void set_cpu_online(unsigned int cpu, bool online) +{ + if (online) + cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); +} + +void set_cpu_active(unsigned int cpu, bool active) +{ + if (active) + cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + else + cpumask_clear_cpu(cpu, to_cpumask(cpu_active_bits)); +} + +void init_cpu_present(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_present_bits), src); +} + +void init_cpu_possible(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_possible_bits), src); +} + +void init_cpu_online(const struct cpumask *src) +{ + cpumask_copy(to_cpumask(cpu_online_bits), src); +} diff --git a/test/fixtures/c/diff.c b/test/fixtures/c/diff.c new file mode 100644 index 00000000..90baa958 --- /dev/null +++ b/test/fixtures/c/diff.c @@ -0,0 +1,784 @@ +/* + * Copyright (C) 2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ +#include "common.h" +#include "git2/diff.h" +#include "diff.h" +#include "fileops.h" +#include "config.h" +#include "attr_file.h" + +static char *diff_prefix_from_pathspec(const git_strarray *pathspec) +{ + git_buf prefix = GIT_BUF_INIT; + const char *scan; + + if (git_buf_common_prefix(&prefix, pathspec) < 0) + return NULL; + + /* diff prefix will only be leading non-wildcards */ + for (scan = prefix.ptr; *scan && !git__iswildcard(*scan); ++scan); + git_buf_truncate(&prefix, scan - prefix.ptr); + + if (prefix.size > 0) + return git_buf_detach(&prefix); + + git_buf_free(&prefix); + return NULL; +} + +static bool diff_pathspec_is_interesting(const git_strarray *pathspec) +{ + const char *str; + + if (pathspec == NULL || pathspec->count == 0) + return false; + if (pathspec->count > 1) + return true; + + str = pathspec->strings[0]; + if (!str || !str[0] || (!str[1] && (str[0] == '*' || str[0] == '.'))) + return false; + return true; +} + +static bool diff_path_matches_pathspec(git_diff_list *diff, const char *path) +{ + unsigned int i; + git_attr_fnmatch *match; + + if (!diff->pathspec.length) + return true; + + git_vector_foreach(&diff->pathspec, i, match) { + int result = p_fnmatch(match->pattern, path, 0); + + /* if we didn't match, look for exact dirname prefix match */ + if (result == FNM_NOMATCH && + (match->flags & GIT_ATTR_FNMATCH_HASWILD) == 0 && + strncmp(path, match->pattern, match->length) == 0 && + path[match->length] == '/') + result = 0; + + if (result == 0) + return (match->flags & GIT_ATTR_FNMATCH_NEGATIVE) ? false : true; + } + + return false; +} + +static git_diff_delta *diff_delta__alloc( + git_diff_list *diff, + git_delta_t status, + const char *path) +{ + git_diff_delta *delta = git__calloc(1, sizeof(git_diff_delta)); + if (!delta) + return NULL; + + delta->old_file.path = git_pool_strdup(&diff->pool, path); + if (delta->old_file.path == NULL) { + git__free(delta); + return NULL; + } + + delta->new_file.path = delta->old_file.path; + + if (diff->opts.flags & GIT_DIFF_REVERSE) { + switch (status) { + case GIT_DELTA_ADDED: status = GIT_DELTA_DELETED; break; + case GIT_DELTA_DELETED: status = GIT_DELTA_ADDED; break; + default: break; /* leave other status values alone */ + } + } + delta->status = status; + + return delta; +} + +static git_diff_delta *diff_delta__dup( + const git_diff_delta *d, git_pool *pool) +{ + git_diff_delta *delta = git__malloc(sizeof(git_diff_delta)); + if (!delta) + return NULL; + + memcpy(delta, d, sizeof(git_diff_delta)); + + delta->old_file.path = git_pool_strdup(pool, d->old_file.path); + if (delta->old_file.path == NULL) + goto fail; + + if (d->new_file.path != d->old_file.path) { + delta->new_file.path = git_pool_strdup(pool, d->new_file.path); + if (delta->new_file.path == NULL) + goto fail; + } else { + delta->new_file.path = delta->old_file.path; + } + + return delta; + +fail: + git__free(delta); + return NULL; +} + +static git_diff_delta *diff_delta__merge_like_cgit( + const git_diff_delta *a, const git_diff_delta *b, git_pool *pool) +{ + git_diff_delta *dup = diff_delta__dup(a, pool); + if (!dup) + return NULL; + + if (git_oid_cmp(&dup->new_file.oid, &b->new_file.oid) == 0) + return dup; + + git_oid_cpy(&dup->new_file.oid, &b->new_file.oid); + + dup->new_file.mode = b->new_file.mode; + dup->new_file.size = b->new_file.size; + dup->new_file.flags = b->new_file.flags; + + /* Emulate C git for merging two diffs (a la 'git diff '). + * + * When C git does a diff between the work dir and a tree, it actually + * diffs with the index but uses the workdir contents. This emulates + * those choices so we can emulate the type of diff. + */ + if (git_oid_cmp(&dup->old_file.oid, &dup->new_file.oid) == 0) { + if (dup->status == GIT_DELTA_DELETED) + /* preserve pending delete info */; + else if (b->status == GIT_DELTA_UNTRACKED || + b->status == GIT_DELTA_IGNORED) + dup->status = b->status; + else + dup->status = GIT_DELTA_UNMODIFIED; + } + else if (dup->status == GIT_DELTA_UNMODIFIED || + b->status == GIT_DELTA_DELETED) + dup->status = b->status; + + return dup; +} + +static int diff_delta__from_one( + git_diff_list *diff, + git_delta_t status, + const git_index_entry *entry) +{ + git_diff_delta *delta; + + if (status == GIT_DELTA_IGNORED && + (diff->opts.flags & GIT_DIFF_INCLUDE_IGNORED) == 0) + return 0; + + if (status == GIT_DELTA_UNTRACKED && + (diff->opts.flags & GIT_DIFF_INCLUDE_UNTRACKED) == 0) + return 0; + + if (!diff_path_matches_pathspec(diff, entry->path)) + return 0; + + delta = diff_delta__alloc(diff, status, entry->path); + GITERR_CHECK_ALLOC(delta); + + /* This fn is just for single-sided diffs */ + assert(status != GIT_DELTA_MODIFIED); + + if (delta->status == GIT_DELTA_DELETED) { + delta->old_file.mode = entry->mode; + delta->old_file.size = entry->file_size; + git_oid_cpy(&delta->old_file.oid, &entry->oid); + } else /* ADDED, IGNORED, UNTRACKED */ { + delta->new_file.mode = entry->mode; + delta->new_file.size = entry->file_size; + git_oid_cpy(&delta->new_file.oid, &entry->oid); + } + + delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + + if (git_vector_insert(&diff->deltas, delta) < 0) { + git__free(delta); + return -1; + } + + return 0; +} + +static int diff_delta__from_two( + git_diff_list *diff, + git_delta_t status, + const git_index_entry *old_entry, + const git_index_entry *new_entry, + git_oid *new_oid) +{ + git_diff_delta *delta; + + if (status == GIT_DELTA_UNMODIFIED && + (diff->opts.flags & GIT_DIFF_INCLUDE_UNMODIFIED) == 0) + return 0; + + if ((diff->opts.flags & GIT_DIFF_REVERSE) != 0) { + const git_index_entry *temp = old_entry; + old_entry = new_entry; + new_entry = temp; + } + + delta = diff_delta__alloc(diff, status, old_entry->path); + GITERR_CHECK_ALLOC(delta); + + delta->old_file.mode = old_entry->mode; + git_oid_cpy(&delta->old_file.oid, &old_entry->oid); + delta->old_file.flags |= GIT_DIFF_FILE_VALID_OID; + + delta->new_file.mode = new_entry->mode; + git_oid_cpy(&delta->new_file.oid, new_oid ? new_oid : &new_entry->oid); + if (new_oid || !git_oid_iszero(&new_entry->oid)) + delta->new_file.flags |= GIT_DIFF_FILE_VALID_OID; + + if (git_vector_insert(&diff->deltas, delta) < 0) { + git__free(delta); + return -1; + } + + return 0; +} + +static char *diff_strdup_prefix(git_pool *pool, const char *prefix) +{ + size_t len = strlen(prefix); + + /* append '/' at end if needed */ + if (len > 0 && prefix[len - 1] != '/') + return git_pool_strcat(pool, prefix, "/"); + else + return git_pool_strndup(pool, prefix, len + 1); +} + +static int diff_delta__cmp(const void *a, const void *b) +{ + const git_diff_delta *da = a, *db = b; + int val = strcmp(da->old_file.path, db->old_file.path); + return val ? val : ((int)da->status - (int)db->status); +} + +static int config_bool(git_config *cfg, const char *name, int defvalue) +{ + int val = defvalue; + + if (git_config_get_bool(&val, cfg, name) < 0) + giterr_clear(); + + return val; +} + +static git_diff_list *git_diff_list_alloc( + git_repository *repo, const git_diff_options *opts) +{ + git_config *cfg; + size_t i; + git_diff_list *diff = git__calloc(1, sizeof(git_diff_list)); + if (diff == NULL) + return NULL; + + diff->repo = repo; + + if (git_vector_init(&diff->deltas, 0, diff_delta__cmp) < 0 || + git_pool_init(&diff->pool, 1, 0) < 0) + goto fail; + + /* load config values that affect diff behavior */ + if (git_repository_config__weakptr(&cfg, repo) < 0) + goto fail; + if (config_bool(cfg, "core.symlinks", 1)) + diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_HAS_SYMLINKS; + if (config_bool(cfg, "core.ignorestat", 0)) + diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_ASSUME_UNCHANGED; + if (config_bool(cfg, "core.filemode", 1)) + diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_TRUST_EXEC_BIT; + if (config_bool(cfg, "core.trustctime", 1)) + diff->diffcaps = diff->diffcaps | GIT_DIFFCAPS_TRUST_CTIME; + /* Don't set GIT_DIFFCAPS_USE_DEV - compile time option in core git */ + + if (opts == NULL) + return diff; + + memcpy(&diff->opts, opts, sizeof(git_diff_options)); + memset(&diff->opts.pathspec, 0, sizeof(diff->opts.pathspec)); + + diff->opts.old_prefix = diff_strdup_prefix(&diff->pool, + opts->old_prefix ? opts->old_prefix : DIFF_OLD_PREFIX_DEFAULT); + diff->opts.new_prefix = diff_strdup_prefix(&diff->pool, + opts->new_prefix ? opts->new_prefix : DIFF_NEW_PREFIX_DEFAULT); + + if (!diff->opts.old_prefix || !diff->opts.new_prefix) + goto fail; + + if (diff->opts.flags & GIT_DIFF_REVERSE) { + char *swap = diff->opts.old_prefix; + diff->opts.old_prefix = diff->opts.new_prefix; + diff->opts.new_prefix = swap; + } + + /* only copy pathspec if it is "interesting" so we can test + * diff->pathspec.length > 0 to know if it is worth calling + * fnmatch as we iterate. + */ + if (!diff_pathspec_is_interesting(&opts->pathspec)) + return diff; + + if (git_vector_init( + &diff->pathspec, (unsigned int)opts->pathspec.count, NULL) < 0) + goto fail; + + for (i = 0; i < opts->pathspec.count; ++i) { + int ret; + const char *pattern = opts->pathspec.strings[i]; + git_attr_fnmatch *match = git__calloc(1, sizeof(git_attr_fnmatch)); + if (!match) + goto fail; + match->flags = GIT_ATTR_FNMATCH_ALLOWSPACE; + ret = git_attr_fnmatch__parse(match, &diff->pool, NULL, &pattern); + if (ret == GIT_ENOTFOUND) { + git__free(match); + continue; + } else if (ret < 0) + goto fail; + + if (git_vector_insert(&diff->pathspec, match) < 0) + goto fail; + } + + return diff; + +fail: + git_diff_list_free(diff); + return NULL; +} + +void git_diff_list_free(git_diff_list *diff) +{ + git_diff_delta *delta; + git_attr_fnmatch *match; + unsigned int i; + + if (!diff) + return; + + git_vector_foreach(&diff->deltas, i, delta) { + git__free(delta); + diff->deltas.contents[i] = NULL; + } + git_vector_free(&diff->deltas); + + git_vector_foreach(&diff->pathspec, i, match) { + git__free(match); + diff->pathspec.contents[i] = NULL; + } + git_vector_free(&diff->pathspec); + + git_pool_clear(&diff->pool); + git__free(diff); +} + +static int oid_for_workdir_item( + git_repository *repo, + const git_index_entry *item, + git_oid *oid) +{ + int result; + git_buf full_path = GIT_BUF_INIT; + + if (git_buf_joinpath(&full_path, git_repository_workdir(repo), item->path) < 0) + return -1; + + /* calculate OID for file if possible*/ + if (S_ISLNK(item->mode)) + result = git_odb__hashlink(oid, full_path.ptr); + else if (!git__is_sizet(item->file_size)) { + giterr_set(GITERR_OS, "File size overflow for 32-bit systems"); + result = -1; + } else { + int fd = git_futils_open_ro(full_path.ptr); + if (fd < 0) + result = fd; + else { + result = git_odb__hashfd( + oid, fd, (size_t)item->file_size, GIT_OBJ_BLOB); + p_close(fd); + } + } + + git_buf_free(&full_path); + + return result; +} + +#define EXEC_BIT_MASK 0000111 + +static int maybe_modified( + git_iterator *old_iter, + const git_index_entry *oitem, + git_iterator *new_iter, + const git_index_entry *nitem, + git_diff_list *diff) +{ + git_oid noid, *use_noid = NULL; + git_delta_t status = GIT_DELTA_MODIFIED; + unsigned int omode = oitem->mode; + unsigned int nmode = nitem->mode; + + GIT_UNUSED(old_iter); + + if (!diff_path_matches_pathspec(diff, oitem->path)) + return 0; + + /* on platforms with no symlinks, promote plain files to symlinks */ + if (S_ISLNK(omode) && S_ISREG(nmode) && + !(diff->diffcaps & GIT_DIFFCAPS_HAS_SYMLINKS)) + nmode = GIT_MODE_TYPE(omode) | (nmode & GIT_MODE_PERMS_MASK); + + /* on platforms with no execmode, clear exec bit from comparisons */ + if (!(diff->diffcaps & GIT_DIFFCAPS_TRUST_EXEC_BIT)) { + omode = omode & ~EXEC_BIT_MASK; + nmode = nmode & ~EXEC_BIT_MASK; + } + + /* support "assume unchanged" (badly, b/c we still stat everything) */ + if ((diff->diffcaps & GIT_DIFFCAPS_ASSUME_UNCHANGED) != 0) + status = (oitem->flags_extended & GIT_IDXENTRY_INTENT_TO_ADD) ? + GIT_DELTA_MODIFIED : GIT_DELTA_UNMODIFIED; + + /* support "skip worktree" index bit */ + else if ((oitem->flags_extended & GIT_IDXENTRY_SKIP_WORKTREE) != 0) + status = GIT_DELTA_UNMODIFIED; + + /* if basic type of file changed, then split into delete and add */ + else if (GIT_MODE_TYPE(omode) != GIT_MODE_TYPE(nmode)) { + if (diff_delta__from_one(diff, GIT_DELTA_DELETED, oitem) < 0 || + diff_delta__from_one(diff, GIT_DELTA_ADDED, nitem) < 0) + return -1; + return 0; + } + + /* if oids and modes match, then file is unmodified */ + else if (git_oid_cmp(&oitem->oid, &nitem->oid) == 0 && + omode == nmode) + status = GIT_DELTA_UNMODIFIED; + + /* if we have a workdir item with an unknown oid, check deeper */ + else if (git_oid_iszero(&nitem->oid) && new_iter->type == GIT_ITERATOR_WORKDIR) { + /* TODO: add check against index file st_mtime to avoid racy-git */ + + /* if they files look exactly alike, then we'll assume the same */ + if (oitem->file_size == nitem->file_size && + (!(diff->diffcaps & GIT_DIFFCAPS_TRUST_CTIME) || + (oitem->ctime.seconds == nitem->ctime.seconds)) && + oitem->mtime.seconds == nitem->mtime.seconds && + (!(diff->diffcaps & GIT_DIFFCAPS_USE_DEV) || + (oitem->dev == nitem->dev)) && + oitem->ino == nitem->ino && + oitem->uid == nitem->uid && + oitem->gid == nitem->gid) + status = GIT_DELTA_UNMODIFIED; + + else if (S_ISGITLINK(nmode)) { + git_submodule *sub; + + if ((diff->opts.flags & GIT_DIFF_IGNORE_SUBMODULES) != 0) + status = GIT_DELTA_UNMODIFIED; + else if (git_submodule_lookup(&sub, diff->repo, nitem->path) < 0) + return -1; + else if (sub->ignore == GIT_SUBMODULE_IGNORE_ALL) + status = GIT_DELTA_UNMODIFIED; + else { + /* TODO: support other GIT_SUBMODULE_IGNORE values */ + status = GIT_DELTA_UNMODIFIED; + } + } + + /* TODO: check git attributes so we will not have to read the file + * in if it is marked binary. + */ + + else if (oid_for_workdir_item(diff->repo, nitem, &noid) < 0) + return -1; + + else if (git_oid_cmp(&oitem->oid, &noid) == 0 && + omode == nmode) + status = GIT_DELTA_UNMODIFIED; + + /* store calculated oid so we don't have to recalc later */ + use_noid = &noid; + } + + return diff_delta__from_two(diff, status, oitem, nitem, use_noid); +} + +static int diff_from_iterators( + git_repository *repo, + const git_diff_options *opts, /**< can be NULL for defaults */ + git_iterator *old_iter, + git_iterator *new_iter, + git_diff_list **diff_ptr) +{ + const git_index_entry *oitem, *nitem; + git_buf ignore_prefix = GIT_BUF_INIT; + git_diff_list *diff = git_diff_list_alloc(repo, opts); + if (!diff) + goto fail; + + diff->old_src = old_iter->type; + diff->new_src = new_iter->type; + + if (git_iterator_current(old_iter, &oitem) < 0 || + git_iterator_current(new_iter, &nitem) < 0) + goto fail; + + /* run iterators building diffs */ + while (oitem || nitem) { + + /* create DELETED records for old items not matched in new */ + if (oitem && (!nitem || strcmp(oitem->path, nitem->path) < 0)) { + if (diff_delta__from_one(diff, GIT_DELTA_DELETED, oitem) < 0 || + git_iterator_advance(old_iter, &oitem) < 0) + goto fail; + } + + /* create ADDED, TRACKED, or IGNORED records for new items not + * matched in old (and/or descend into directories as needed) + */ + else if (nitem && (!oitem || strcmp(oitem->path, nitem->path) > 0)) { + git_delta_t delta_type = GIT_DELTA_UNTRACKED; + + /* check if contained in ignored parent directory */ + if (git_buf_len(&ignore_prefix) && + git__prefixcmp(nitem->path, git_buf_cstr(&ignore_prefix)) == 0) + delta_type = GIT_DELTA_IGNORED; + + if (S_ISDIR(nitem->mode)) { + /* recurse into directory only if there are tracked items in + * it or if the user requested the contents of untracked + * directories and it is not under an ignored directory. + */ + if ((oitem && git__prefixcmp(oitem->path, nitem->path) == 0) || + (delta_type == GIT_DELTA_UNTRACKED && + (diff->opts.flags & GIT_DIFF_RECURSE_UNTRACKED_DIRS) != 0)) + { + /* if this directory is ignored, remember it as the + * "ignore_prefix" for processing contained items + */ + if (delta_type == GIT_DELTA_UNTRACKED && + git_iterator_current_is_ignored(new_iter)) + git_buf_sets(&ignore_prefix, nitem->path); + + if (git_iterator_advance_into_directory(new_iter, &nitem) < 0) + goto fail; + + continue; + } + } + + /* In core git, the next two "else if" clauses are effectively + * reversed -- i.e. when an untracked file contained in an + * ignored directory is individually ignored, it shows up as an + * ignored file in the diff list, even though other untracked + * files in the same directory are skipped completely. + * + * To me, this is odd. If the directory is ignored and the file + * is untracked, we should skip it consistently, regardless of + * whether it happens to match a pattern in the ignore file. + * + * To match the core git behavior, just reverse the following + * two "else if" cases so that individual file ignores are + * checked before container directory exclusions are used to + * skip the file. + */ + else if (delta_type == GIT_DELTA_IGNORED) { + if (git_iterator_advance(new_iter, &nitem) < 0) + goto fail; + continue; /* ignored parent directory, so skip completely */ + } + + else if (git_iterator_current_is_ignored(new_iter)) + delta_type = GIT_DELTA_IGNORED; + + else if (new_iter->type != GIT_ITERATOR_WORKDIR) + delta_type = GIT_DELTA_ADDED; + + if (diff_delta__from_one(diff, delta_type, nitem) < 0 || + git_iterator_advance(new_iter, &nitem) < 0) + goto fail; + } + + /* otherwise item paths match, so create MODIFIED record + * (or ADDED and DELETED pair if type changed) + */ + else { + assert(oitem && nitem && strcmp(oitem->path, nitem->path) == 0); + + if (maybe_modified(old_iter, oitem, new_iter, nitem, diff) < 0 || + git_iterator_advance(old_iter, &oitem) < 0 || + git_iterator_advance(new_iter, &nitem) < 0) + goto fail; + } + } + + git_iterator_free(old_iter); + git_iterator_free(new_iter); + git_buf_free(&ignore_prefix); + + *diff_ptr = diff; + return 0; + +fail: + git_iterator_free(old_iter); + git_iterator_free(new_iter); + git_buf_free(&ignore_prefix); + + git_diff_list_free(diff); + *diff_ptr = NULL; + return -1; +} + + +int git_diff_tree_to_tree( + git_repository *repo, + const git_diff_options *opts, /**< can be NULL for defaults */ + git_tree *old_tree, + git_tree *new_tree, + git_diff_list **diff) +{ + git_iterator *a = NULL, *b = NULL; + char *prefix = opts ? diff_prefix_from_pathspec(&opts->pathspec) : NULL; + + assert(repo && old_tree && new_tree && diff); + + if (git_iterator_for_tree_range(&a, repo, old_tree, prefix, prefix) < 0 || + git_iterator_for_tree_range(&b, repo, new_tree, prefix, prefix) < 0) + return -1; + + git__free(prefix); + + return diff_from_iterators(repo, opts, a, b, diff); +} + +int git_diff_index_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old_tree, + git_diff_list **diff) +{ + git_iterator *a = NULL, *b = NULL; + char *prefix = opts ? diff_prefix_from_pathspec(&opts->pathspec) : NULL; + + assert(repo && diff); + + if (git_iterator_for_tree_range(&a, repo, old_tree, prefix, prefix) < 0 || + git_iterator_for_index_range(&b, repo, prefix, prefix) < 0) + return -1; + + git__free(prefix); + + return diff_from_iterators(repo, opts, a, b, diff); +} + +int git_diff_workdir_to_index( + git_repository *repo, + const git_diff_options *opts, + git_diff_list **diff) +{ + git_iterator *a = NULL, *b = NULL; + char *prefix = opts ? diff_prefix_from_pathspec(&opts->pathspec) : NULL; + + assert(repo && diff); + + if (git_iterator_for_index_range(&a, repo, prefix, prefix) < 0 || + git_iterator_for_workdir_range(&b, repo, prefix, prefix) < 0) + return -1; + + git__free(prefix); + + return diff_from_iterators(repo, opts, a, b, diff); +} + + +int git_diff_workdir_to_tree( + git_repository *repo, + const git_diff_options *opts, + git_tree *old_tree, + git_diff_list **diff) +{ + git_iterator *a = NULL, *b = NULL; + char *prefix = opts ? diff_prefix_from_pathspec(&opts->pathspec) : NULL; + + assert(repo && old_tree && diff); + + if (git_iterator_for_tree_range(&a, repo, old_tree, prefix, prefix) < 0 || + git_iterator_for_workdir_range(&b, repo, prefix, prefix) < 0) + return -1; + + git__free(prefix); + + return diff_from_iterators(repo, opts, a, b, diff); +} + +int git_diff_merge( + git_diff_list *onto, + const git_diff_list *from) +{ + int error = 0; + git_pool onto_pool; + git_vector onto_new; + git_diff_delta *delta; + unsigned int i, j; + + assert(onto && from); + + if (!from->deltas.length) + return 0; + + if (git_vector_init(&onto_new, onto->deltas.length, diff_delta__cmp) < 0 || + git_pool_init(&onto_pool, 1, 0) < 0) + return -1; + + for (i = 0, j = 0; i < onto->deltas.length || j < from->deltas.length; ) { + git_diff_delta *o = GIT_VECTOR_GET(&onto->deltas, i); + const git_diff_delta *f = GIT_VECTOR_GET(&from->deltas, j); + int cmp = !f ? -1 : !o ? 1 : strcmp(o->old_file.path, f->old_file.path); + + if (cmp < 0) { + delta = diff_delta__dup(o, &onto_pool); + i++; + } else if (cmp > 0) { + delta = diff_delta__dup(f, &onto_pool); + j++; + } else { + delta = diff_delta__merge_like_cgit(o, f, &onto_pool); + i++; + j++; + } + + if ((error = !delta ? -1 : git_vector_insert(&onto_new, delta)) < 0) + break; + } + + if (!error) { + git_vector_swap(&onto->deltas, &onto_new); + git_pool_swap(&onto->pool, &onto_pool); + onto->new_src = from->new_src; + } + + git_vector_foreach(&onto_new, i, delta) + git__free(delta); + git_vector_free(&onto_new); + git_pool_clear(&onto_pool); + + return error; +} + diff --git a/test/fixtures/c/git.c b/test/fixtures/c/git.c new file mode 100644 index 00000000..d232de92 --- /dev/null +++ b/test/fixtures/c/git.c @@ -0,0 +1,609 @@ +#include "builtin.h" +#include "cache.h" +#include "exec_cmd.h" +#include "help.h" +#include "quote.h" +#include "run-command.h" + +const char git_usage_string[] = + "git [--version] [--exec-path[=]] [--html-path] [--man-path] [--info-path]\n" + " [-p|--paginate|--no-pager] [--no-replace-objects] [--bare]\n" + " [--git-dir=] [--work-tree=] [--namespace=]\n" + " [-c name=value] [--help]\n" + " []"; + +const char git_more_info_string[] = + N_("See 'git help ' for more information on a specific command."); + +static struct startup_info git_startup_info; +static int use_pager = -1; +struct pager_config { + const char *cmd; + int want; + char *value; +}; + +static int pager_command_config(const char *var, const char *value, void *data) +{ + struct pager_config *c = data; + if (!prefixcmp(var, "pager.") && !strcmp(var + 6, c->cmd)) { + int b = git_config_maybe_bool(var, value); + if (b >= 0) + c->want = b; + else { + c->want = 1; + c->value = xstrdup(value); + } + } + return 0; +} + +/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */ +int check_pager_config(const char *cmd) +{ + struct pager_config c; + c.cmd = cmd; + c.want = -1; + c.value = NULL; + git_config(pager_command_config, &c); + if (c.value) + pager_program = c.value; + return c.want; +} + +static void commit_pager_choice(void) { + switch (use_pager) { + case 0: + setenv("GIT_PAGER", "cat", 1); + break; + case 1: + setup_pager(); + break; + default: + break; + } +} + +static int handle_options(const char ***argv, int *argc, int *envchanged) +{ + const char **orig_argv = *argv; + + while (*argc > 0) { + const char *cmd = (*argv)[0]; + if (cmd[0] != '-') + break; + + /* + * For legacy reasons, the "version" and "help" + * commands can be written with "--" prepended + * to make them look like flags. + */ + if (!strcmp(cmd, "--help") || !strcmp(cmd, "--version")) + break; + + /* + * Check remaining flags. + */ + if (!prefixcmp(cmd, "--exec-path")) { + cmd += 11; + if (*cmd == '=') + git_set_argv_exec_path(cmd + 1); + else { + puts(git_exec_path()); + exit(0); + } + } else if (!strcmp(cmd, "--html-path")) { + puts(system_path(GIT_HTML_PATH)); + exit(0); + } else if (!strcmp(cmd, "--man-path")) { + puts(system_path(GIT_MAN_PATH)); + exit(0); + } else if (!strcmp(cmd, "--info-path")) { + puts(system_path(GIT_INFO_PATH)); + exit(0); + } else if (!strcmp(cmd, "-p") || !strcmp(cmd, "--paginate")) { + use_pager = 1; + } else if (!strcmp(cmd, "--no-pager")) { + use_pager = 0; + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--no-replace-objects")) { + read_replace_refs = 0; + setenv(NO_REPLACE_OBJECTS_ENVIRONMENT, "1", 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--git-dir")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --git-dir.\n" ); + usage(git_usage_string); + } + setenv(GIT_DIR_ENVIRONMENT, (*argv)[1], 1); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + } else if (!prefixcmp(cmd, "--git-dir=")) { + setenv(GIT_DIR_ENVIRONMENT, cmd + 10, 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--namespace")) { + if (*argc < 2) { + fprintf(stderr, "No namespace given for --namespace.\n" ); + usage(git_usage_string); + } + setenv(GIT_NAMESPACE_ENVIRONMENT, (*argv)[1], 1); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + } else if (!prefixcmp(cmd, "--namespace=")) { + setenv(GIT_NAMESPACE_ENVIRONMENT, cmd + 12, 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--work-tree")) { + if (*argc < 2) { + fprintf(stderr, "No directory given for --work-tree.\n" ); + usage(git_usage_string); + } + setenv(GIT_WORK_TREE_ENVIRONMENT, (*argv)[1], 1); + if (envchanged) + *envchanged = 1; + (*argv)++; + (*argc)--; + } else if (!prefixcmp(cmd, "--work-tree=")) { + setenv(GIT_WORK_TREE_ENVIRONMENT, cmd + 12, 1); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "--bare")) { + static char git_dir[PATH_MAX+1]; + is_bare_repository_cfg = 1; + setenv(GIT_DIR_ENVIRONMENT, getcwd(git_dir, sizeof(git_dir)), 0); + if (envchanged) + *envchanged = 1; + } else if (!strcmp(cmd, "-c")) { + if (*argc < 2) { + fprintf(stderr, "-c expects a configuration string\n" ); + usage(git_usage_string); + } + git_config_push_parameter((*argv)[1]); + (*argv)++; + (*argc)--; + } else { + fprintf(stderr, "Unknown option: %s\n", cmd); + usage(git_usage_string); + } + + (*argv)++; + (*argc)--; + } + return (*argv) - orig_argv; +} + +static int handle_alias(int *argcp, const char ***argv) +{ + int envchanged = 0, ret = 0, saved_errno = errno; + const char *subdir; + int count, option_count; + const char **new_argv; + const char *alias_command; + char *alias_string; + int unused_nongit; + + subdir = setup_git_directory_gently(&unused_nongit); + + alias_command = (*argv)[0]; + alias_string = alias_lookup(alias_command); + if (alias_string) { + if (alias_string[0] == '!') { + const char **alias_argv; + int argc = *argcp, i; + + commit_pager_choice(); + + /* build alias_argv */ + alias_argv = xmalloc(sizeof(*alias_argv) * (argc + 1)); + alias_argv[0] = alias_string + 1; + for (i = 1; i < argc; ++i) + alias_argv[i] = (*argv)[i]; + alias_argv[argc] = NULL; + + ret = run_command_v_opt(alias_argv, RUN_USING_SHELL); + if (ret >= 0) /* normal exit */ + exit(ret); + + die_errno("While expanding alias '%s': '%s'", + alias_command, alias_string + 1); + } + count = split_cmdline(alias_string, &new_argv); + if (count < 0) + die("Bad alias.%s string: %s", alias_command, + split_cmdline_strerror(count)); + option_count = handle_options(&new_argv, &count, &envchanged); + if (envchanged) + die("alias '%s' changes environment variables\n" + "You can use '!git' in the alias to do this.", + alias_command); + memmove(new_argv - option_count, new_argv, + count * sizeof(char *)); + new_argv -= option_count; + + if (count < 1) + die("empty alias for %s", alias_command); + + if (!strcmp(alias_command, new_argv[0])) + die("recursive alias: %s", alias_command); + + trace_argv_printf(new_argv, + "trace: alias expansion: %s =>", + alias_command); + + new_argv = xrealloc(new_argv, sizeof(char *) * + (count + *argcp)); + /* insert after command name */ + memcpy(new_argv + count, *argv + 1, sizeof(char *) * *argcp); + + *argv = new_argv; + *argcp += count - 1; + + ret = 1; + } + + if (subdir && chdir(subdir)) + die_errno("Cannot change to '%s'", subdir); + + errno = saved_errno; + + return ret; +} + +const char git_version_string[] = GIT_VERSION; + +#define RUN_SETUP (1<<0) +#define RUN_SETUP_GENTLY (1<<1) +#define USE_PAGER (1<<2) +/* + * require working tree to be present -- anything uses this needs + * RUN_SETUP for reading from the configuration file. + */ +#define NEED_WORK_TREE (1<<3) + +struct cmd_struct { + const char *cmd; + int (*fn)(int, const char **, const char *); + int option; +}; + +static int run_builtin(struct cmd_struct *p, int argc, const char **argv) +{ + int status, help; + struct stat st; + const char *prefix; + + prefix = NULL; + help = argc == 2 && !strcmp(argv[1], "-h"); + if (!help) { + if (p->option & RUN_SETUP) + prefix = setup_git_directory(); + if (p->option & RUN_SETUP_GENTLY) { + int nongit_ok; + prefix = setup_git_directory_gently(&nongit_ok); + } + + if (use_pager == -1 && p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) + use_pager = check_pager_config(p->cmd); + if (use_pager == -1 && p->option & USE_PAGER) + use_pager = 1; + + if ((p->option & (RUN_SETUP | RUN_SETUP_GENTLY)) && + startup_info->have_repository) /* get_git_dir() may set up repo, avoid that */ + trace_repo_setup(prefix); + } + commit_pager_choice(); + + if (!help && p->option & NEED_WORK_TREE) + setup_work_tree(); + + trace_argv_printf(argv, "trace: built-in: git"); + + status = p->fn(argc, argv, prefix); + if (status) + return status; + + /* Somebody closed stdout? */ + if (fstat(fileno(stdout), &st)) + return 0; + /* Ignore write errors for pipes and sockets.. */ + if (S_ISFIFO(st.st_mode) || S_ISSOCK(st.st_mode)) + return 0; + + /* Check for ENOSPC and EIO errors.. */ + if (fflush(stdout)) + die_errno("write failure on standard output"); + if (ferror(stdout)) + die("unknown write failure on standard output"); + if (fclose(stdout)) + die_errno("close failed on standard output"); + return 0; +} + +static void handle_internal_command(int argc, const char **argv) +{ + const char *cmd = argv[0]; + static struct cmd_struct commands[] = { + { "add", cmd_add, RUN_SETUP | NEED_WORK_TREE }, + { "annotate", cmd_annotate, RUN_SETUP }, + { "apply", cmd_apply, RUN_SETUP_GENTLY }, + { "archive", cmd_archive }, + { "bisect--helper", cmd_bisect__helper, RUN_SETUP }, + { "blame", cmd_blame, RUN_SETUP }, + { "branch", cmd_branch, RUN_SETUP }, + { "bundle", cmd_bundle, RUN_SETUP_GENTLY }, + { "cat-file", cmd_cat_file, RUN_SETUP }, + { "check-attr", cmd_check_attr, RUN_SETUP }, + { "check-ref-format", cmd_check_ref_format }, + { "checkout", cmd_checkout, RUN_SETUP | NEED_WORK_TREE }, + { "checkout-index", cmd_checkout_index, + RUN_SETUP | NEED_WORK_TREE}, + { "cherry", cmd_cherry, RUN_SETUP }, + { "cherry-pick", cmd_cherry_pick, RUN_SETUP | NEED_WORK_TREE }, + { "clean", cmd_clean, RUN_SETUP | NEED_WORK_TREE }, + { "clone", cmd_clone }, + { "column", cmd_column, RUN_SETUP_GENTLY }, + { "commit", cmd_commit, RUN_SETUP | NEED_WORK_TREE }, + { "commit-tree", cmd_commit_tree, RUN_SETUP }, + { "config", cmd_config, RUN_SETUP_GENTLY }, + { "count-objects", cmd_count_objects, RUN_SETUP }, + { "describe", cmd_describe, RUN_SETUP }, + { "diff", cmd_diff }, + { "diff-files", cmd_diff_files, RUN_SETUP | NEED_WORK_TREE }, + { "diff-index", cmd_diff_index, RUN_SETUP }, + { "diff-tree", cmd_diff_tree, RUN_SETUP }, + { "fast-export", cmd_fast_export, RUN_SETUP }, + { "fetch", cmd_fetch, RUN_SETUP }, + { "fetch-pack", cmd_fetch_pack, RUN_SETUP }, + { "fmt-merge-msg", cmd_fmt_merge_msg, RUN_SETUP }, + { "for-each-ref", cmd_for_each_ref, RUN_SETUP }, + { "format-patch", cmd_format_patch, RUN_SETUP }, + { "fsck", cmd_fsck, RUN_SETUP }, + { "fsck-objects", cmd_fsck, RUN_SETUP }, + { "gc", cmd_gc, RUN_SETUP }, + { "get-tar-commit-id", cmd_get_tar_commit_id }, + { "grep", cmd_grep, RUN_SETUP_GENTLY }, + { "hash-object", cmd_hash_object }, + { "help", cmd_help }, + { "index-pack", cmd_index_pack, RUN_SETUP_GENTLY }, + { "init", cmd_init_db }, + { "init-db", cmd_init_db }, + { "log", cmd_log, RUN_SETUP }, + { "ls-files", cmd_ls_files, RUN_SETUP }, + { "ls-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, + { "ls-tree", cmd_ls_tree, RUN_SETUP }, + { "mailinfo", cmd_mailinfo }, + { "mailsplit", cmd_mailsplit }, + { "merge", cmd_merge, RUN_SETUP | NEED_WORK_TREE }, + { "merge-base", cmd_merge_base, RUN_SETUP }, + { "merge-file", cmd_merge_file, RUN_SETUP_GENTLY }, + { "merge-index", cmd_merge_index, RUN_SETUP }, + { "merge-ours", cmd_merge_ours, RUN_SETUP }, + { "merge-recursive", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, + { "merge-recursive-ours", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, + { "merge-recursive-theirs", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, + { "merge-subtree", cmd_merge_recursive, RUN_SETUP | NEED_WORK_TREE }, + { "merge-tree", cmd_merge_tree, RUN_SETUP }, + { "mktag", cmd_mktag, RUN_SETUP }, + { "mktree", cmd_mktree, RUN_SETUP }, + { "mv", cmd_mv, RUN_SETUP | NEED_WORK_TREE }, + { "name-rev", cmd_name_rev, RUN_SETUP }, + { "notes", cmd_notes, RUN_SETUP }, + { "pack-objects", cmd_pack_objects, RUN_SETUP }, + { "pack-redundant", cmd_pack_redundant, RUN_SETUP }, + { "pack-refs", cmd_pack_refs, RUN_SETUP }, + { "patch-id", cmd_patch_id }, + { "peek-remote", cmd_ls_remote, RUN_SETUP_GENTLY }, + { "pickaxe", cmd_blame, RUN_SETUP }, + { "prune", cmd_prune, RUN_SETUP }, + { "prune-packed", cmd_prune_packed, RUN_SETUP }, + { "push", cmd_push, RUN_SETUP }, + { "read-tree", cmd_read_tree, RUN_SETUP }, + { "receive-pack", cmd_receive_pack }, + { "reflog", cmd_reflog, RUN_SETUP }, + { "remote", cmd_remote, RUN_SETUP }, + { "remote-ext", cmd_remote_ext }, + { "remote-fd", cmd_remote_fd }, + { "replace", cmd_replace, RUN_SETUP }, + { "repo-config", cmd_repo_config, RUN_SETUP_GENTLY }, + { "rerere", cmd_rerere, RUN_SETUP }, + { "reset", cmd_reset, RUN_SETUP }, + { "rev-list", cmd_rev_list, RUN_SETUP }, + { "rev-parse", cmd_rev_parse }, + { "revert", cmd_revert, RUN_SETUP | NEED_WORK_TREE }, + { "rm", cmd_rm, RUN_SETUP }, + { "send-pack", cmd_send_pack, RUN_SETUP }, + { "shortlog", cmd_shortlog, RUN_SETUP_GENTLY | USE_PAGER }, + { "show", cmd_show, RUN_SETUP }, + { "show-branch", cmd_show_branch, RUN_SETUP }, + { "show-ref", cmd_show_ref, RUN_SETUP }, + { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, + { "status", cmd_status, RUN_SETUP | NEED_WORK_TREE }, + { "stripspace", cmd_stripspace }, + { "symbolic-ref", cmd_symbolic_ref, RUN_SETUP }, + { "tag", cmd_tag, RUN_SETUP }, + { "tar-tree", cmd_tar_tree }, + { "unpack-file", cmd_unpack_file, RUN_SETUP }, + { "unpack-objects", cmd_unpack_objects, RUN_SETUP }, + { "update-index", cmd_update_index, RUN_SETUP }, + { "update-ref", cmd_update_ref, RUN_SETUP }, + { "update-server-info", cmd_update_server_info, RUN_SETUP }, + { "upload-archive", cmd_upload_archive }, + { "upload-archive--writer", cmd_upload_archive_writer }, + { "var", cmd_var, RUN_SETUP_GENTLY }, + { "verify-pack", cmd_verify_pack }, + { "verify-tag", cmd_verify_tag, RUN_SETUP }, + { "version", cmd_version }, + { "whatchanged", cmd_whatchanged, RUN_SETUP }, + { "write-tree", cmd_write_tree, RUN_SETUP }, + }; + int i; + static const char ext[] = STRIP_EXTENSION; + + if (sizeof(ext) > 1) { + i = strlen(argv[0]) - strlen(ext); + if (i > 0 && !strcmp(argv[0] + i, ext)) { + char *argv0 = xstrdup(argv[0]); + argv[0] = cmd = argv0; + argv0[i] = '\0'; + } + } + + /* Turn "git cmd --help" into "git help cmd" */ + if (argc > 1 && !strcmp(argv[1], "--help")) { + argv[1] = argv[0]; + argv[0] = cmd = "help"; + } + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + struct cmd_struct *p = commands+i; + if (strcmp(p->cmd, cmd)) + continue; + exit(run_builtin(p, argc, argv)); + } +} + +static void execv_dashed_external(const char **argv) +{ + struct strbuf cmd = STRBUF_INIT; + const char *tmp; + int status; + + if (use_pager == -1) + use_pager = check_pager_config(argv[0]); + commit_pager_choice(); + + strbuf_addf(&cmd, "git-%s", argv[0]); + + /* + * argv[0] must be the git command, but the argv array + * belongs to the caller, and may be reused in + * subsequent loop iterations. Save argv[0] and + * restore it on error. + */ + tmp = argv[0]; + argv[0] = cmd.buf; + + trace_argv_printf(argv, "trace: exec:"); + + /* + * if we fail because the command is not found, it is + * OK to return. Otherwise, we just pass along the status code. + */ + status = run_command_v_opt(argv, RUN_SILENT_EXEC_FAILURE | RUN_CLEAN_ON_EXIT); + if (status >= 0 || errno != ENOENT) + exit(status); + + argv[0] = tmp; + + strbuf_release(&cmd); +} + +static int run_argv(int *argcp, const char ***argv) +{ + int done_alias = 0; + + while (1) { + /* See if it's an internal command */ + handle_internal_command(*argcp, *argv); + + /* .. then try the external ones */ + execv_dashed_external(*argv); + + /* It could be an alias -- this works around the insanity + * of overriding "git log" with "git show" by having + * alias.log = show + */ + if (done_alias || !handle_alias(argcp, argv)) + break; + done_alias = 1; + } + + return done_alias; +} + + +int main(int argc, const char **argv) +{ + const char *cmd; + + startup_info = &git_startup_info; + + cmd = git_extract_argv0_path(argv[0]); + if (!cmd) + cmd = "git-help"; + + git_setup_gettext(); + + /* + * "git-xxxx" is the same as "git xxxx", but we obviously: + * + * - cannot take flags in between the "git" and the "xxxx". + * - cannot execute it externally (since it would just do + * the same thing over again) + * + * So we just directly call the internal command handler, and + * die if that one cannot handle it. + */ + if (!prefixcmp(cmd, "git-")) { + cmd += 4; + argv[0] = cmd; + handle_internal_command(argc, argv); + die("cannot handle %s internally", cmd); + } + + /* Look for flags.. */ + argv++; + argc--; + handle_options(&argv, &argc, NULL); + if (argc > 0) { + if (!prefixcmp(argv[0], "--")) + argv[0] += 2; + } else { + /* The user didn't specify a command; give them help */ + commit_pager_choice(); + printf("usage: %s\n\n", git_usage_string); + list_common_cmds_help(); + printf("\n%s\n", git_more_info_string); + exit(1); + } + cmd = argv[0]; + + /* + * We use PATH to find git commands, but we prepend some higher + * precedence paths: the "--exec-path" option, the GIT_EXEC_PATH + * environment, and the $(gitexecdir) from the Makefile at build + * time. + */ + setup_path(); + + while (1) { + static int done_help = 0; + static int was_alias = 0; + was_alias = run_argv(&argc, &argv); + if (errno != ENOENT) + break; + if (was_alias) { + fprintf(stderr, "Expansion of alias '%s' failed; " + "'%s' is not a git command\n", + cmd, argv[0]); + exit(1); + } + if (!done_help) { + cmd = argv[0] = help_unknown_cmd(cmd); + done_help = 1; + } else + break; + } + + fprintf(stderr, "Failed to run command '%s': %s\n", + cmd, strerror(errno)); + + return 1; +} diff --git a/test/fixtures/c/hash.c b/test/fixtures/c/hash.c new file mode 100644 index 00000000..46075691 --- /dev/null +++ b/test/fixtures/c/hash.c @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2009-2012 the libgit2 contributors + * + * This file is part of libgit2, distributed under the GNU GPL v2 with + * a Linking Exception. For full terms see the included COPYING file. + */ + +#include "common.h" +#include "hash.h" + +#if defined(PPC_SHA1) +# include "ppc/sha1.h" +#else +# include "sha1.h" +#endif + +struct git_hash_ctx { + SHA_CTX c; +}; + +git_hash_ctx *git_hash_new_ctx(void) +{ + git_hash_ctx *ctx = git__malloc(sizeof(*ctx)); + + if (!ctx) + return NULL; + + SHA1_Init(&ctx->c); + + return ctx; +} + +void git_hash_free_ctx(git_hash_ctx *ctx) +{ + git__free(ctx); +} + +void git_hash_init(git_hash_ctx *ctx) +{ + assert(ctx); + SHA1_Init(&ctx->c); +} + +void git_hash_update(git_hash_ctx *ctx, const void *data, size_t len) +{ + assert(ctx); + SHA1_Update(&ctx->c, data, len); +} + +void git_hash_final(git_oid *out, git_hash_ctx *ctx) +{ + assert(ctx); + SHA1_Final(out->id, &ctx->c); +} + +void git_hash_buf(git_oid *out, const void *data, size_t len) +{ + SHA_CTX c; + + SHA1_Init(&c); + SHA1_Update(&c, data, len); + SHA1_Final(out->id, &c); +} + +void git_hash_vec(git_oid *out, git_buf_vec *vec, size_t n) +{ + SHA_CTX c; + size_t i; + + SHA1_Init(&c); + for (i = 0; i < n; i++) + SHA1_Update(&c, vec[i].data, vec[i].len); + SHA1_Final(out->id, &c); +} diff --git a/test/fixtures/c/http_parser.c b/test/fixtures/c/http_parser.c new file mode 100644 index 00000000..0c11eb8d --- /dev/null +++ b/test/fixtures/c/http_parser.c @@ -0,0 +1,2059 @@ +/* Based on src/http/ngx_http_parse.c from NGINX copyright Igor Sysoev + * + * Additional changes are licensed under the same terms as NGINX and + * copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#include "http_parser.h" +#include +#include +#include +#include +#include +#include + +#ifndef ULLONG_MAX +# define ULLONG_MAX ((uint64_t) -1) /* 2^64-1 */ +#endif + +#ifndef MIN +# define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +#if HTTP_PARSER_DEBUG +#define SET_ERRNO(e) \ +do { \ + parser->http_errno = (e); \ + parser->error_lineno = __LINE__; \ +} while (0) +#else +#define SET_ERRNO(e) \ +do { \ + parser->http_errno = (e); \ +} while(0) +#endif + + +/* Run the notify callback FOR, returning ER if it fails */ +#define CALLBACK_NOTIFY_(FOR, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser)) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ +} while (0) + +/* Run the notify callback FOR and consume the current byte */ +#define CALLBACK_NOTIFY(FOR) CALLBACK_NOTIFY_(FOR, p - data + 1) + +/* Run the notify callback FOR and don't consume the current byte */ +#define CALLBACK_NOTIFY_NOADVANCE(FOR) CALLBACK_NOTIFY_(FOR, p - data) + +/* Run data callback FOR with LEN bytes, returning ER if it fails */ +#define CALLBACK_DATA_(FOR, LEN, ER) \ +do { \ + assert(HTTP_PARSER_ERRNO(parser) == HPE_OK); \ + \ + if (FOR##_mark) { \ + if (settings->on_##FOR) { \ + if (0 != settings->on_##FOR(parser, FOR##_mark, (LEN))) { \ + SET_ERRNO(HPE_CB_##FOR); \ + } \ + \ + /* We either errored above or got paused; get out */ \ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { \ + return (ER); \ + } \ + } \ + FOR##_mark = NULL; \ + } \ +} while (0) + +/* Run the data callback FOR and consume the current byte */ +#define CALLBACK_DATA(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data + 1) + +/* Run the data callback FOR and don't consume the current byte */ +#define CALLBACK_DATA_NOADVANCE(FOR) \ + CALLBACK_DATA_(FOR, p - FOR##_mark, p - data) + +/* Set the mark FOR; non-destructive if mark is already set */ +#define MARK(FOR) \ +do { \ + if (!FOR##_mark) { \ + FOR##_mark = p; \ + } \ +} while (0) + + +#define PROXY_CONNECTION "proxy-connection" +#define CONNECTION "connection" +#define CONTENT_LENGTH "content-length" +#define TRANSFER_ENCODING "transfer-encoding" +#define UPGRADE "upgrade" +#define CHUNKED "chunked" +#define KEEP_ALIVE "keep-alive" +#define CLOSE "close" + + +static const char *method_strings[] = + { +#define XX(num, name, string) #string, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +/* Tokens as defined by rfc 2616. Also lowercases them. + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + */ +static const char tokens[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, '!', 0, '#', '$', '%', '&', '\'', +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 0, 0, '*', '+', 0, '-', '.', 0, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + '0', '1', '2', '3', '4', '5', '6', '7', +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + '8', '9', 0, 0, 0, 0, 0, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 'x', 'y', 'z', 0, 0, 0, '^', '_', +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 'x', 'y', 'z', 0, '|', 0, '~', 0 }; + + +static const int8_t unhex[256] = + {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + , 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1 + ,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 + }; + + +#if HTTP_PARSER_STRICT +# define T 0 +#else +# define T 1 +#endif + + +static const uint8_t normal_url_char[256] = { +/* 0 nul 1 soh 2 stx 3 etx 4 eot 5 enq 6 ack 7 bel */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 8 bs 9 ht 10 nl 11 vt 12 np 13 cr 14 so 15 si */ + 0, T, 0, 0, T, 0, 0, 0, +/* 16 dle 17 dc1 18 dc2 19 dc3 20 dc4 21 nak 22 syn 23 etb */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 24 can 25 em 26 sub 27 esc 28 fs 29 gs 30 rs 31 us */ + 0, 0, 0, 0, 0, 0, 0, 0, +/* 32 sp 33 ! 34 " 35 # 36 $ 37 % 38 & 39 ' */ + 0, 1, 1, 0, 1, 1, 1, 1, +/* 40 ( 41 ) 42 * 43 + 44 , 45 - 46 . 47 / */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 48 0 49 1 50 2 51 3 52 4 53 5 54 6 55 7 */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 56 8 57 9 58 : 59 ; 60 < 61 = 62 > 63 ? */ + 1, 1, 1, 1, 1, 1, 1, 0, +/* 64 @ 65 A 66 B 67 C 68 D 69 E 70 F 71 G */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 72 H 73 I 74 J 75 K 76 L 77 M 78 N 79 O */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 80 P 81 Q 82 R 83 S 84 T 85 U 86 V 87 W */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 88 X 89 Y 90 Z 91 [ 92 \ 93 ] 94 ^ 95 _ */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 96 ` 97 a 98 b 99 c 100 d 101 e 102 f 103 g */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 104 h 105 i 106 j 107 k 108 l 109 m 110 n 111 o */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 112 p 113 q 114 r 115 s 116 t 117 u 118 v 119 w */ + 1, 1, 1, 1, 1, 1, 1, 1, +/* 120 x 121 y 122 z 123 { 124 | 125 } 126 ~ 127 del */ + 1, 1, 1, 1, 1, 1, 1, 0, }; + +#undef T + +enum state + { s_dead = 1 /* important that this is > 0 */ + + , s_start_req_or_res + , s_res_or_resp_H + , s_start_res + , s_res_H + , s_res_HT + , s_res_HTT + , s_res_HTTP + , s_res_first_http_major + , s_res_http_major + , s_res_first_http_minor + , s_res_http_minor + , s_res_first_status_code + , s_res_status_code + , s_res_status + , s_res_line_almost_done + + , s_start_req + + , s_req_method + , s_req_spaces_before_url + , s_req_schema + , s_req_schema_slash + , s_req_schema_slash_slash + , s_req_host_start + , s_req_host_v6_start + , s_req_host_v6 + , s_req_host_v6_end + , s_req_host + , s_req_port_start + , s_req_port + , s_req_path + , s_req_query_string_start + , s_req_query_string + , s_req_fragment_start + , s_req_fragment + , s_req_http_start + , s_req_http_H + , s_req_http_HT + , s_req_http_HTT + , s_req_http_HTTP + , s_req_first_http_major + , s_req_http_major + , s_req_first_http_minor + , s_req_http_minor + , s_req_line_almost_done + + , s_header_field_start + , s_header_field + , s_header_value_start + , s_header_value + , s_header_value_lws + + , s_header_almost_done + + , s_chunk_size_start + , s_chunk_size + , s_chunk_parameters + , s_chunk_size_almost_done + + , s_headers_almost_done + , s_headers_done + + /* Important: 's_headers_done' must be the last 'header' state. All + * states beyond this must be 'body' states. It is used for overflow + * checking. See the PARSING_HEADER() macro. + */ + + , s_chunk_data + , s_chunk_data_almost_done + , s_chunk_data_done + + , s_body_identity + , s_body_identity_eof + + , s_message_done + }; + + +#define PARSING_HEADER(state) (state <= s_headers_done) + + +enum header_states + { h_general = 0 + , h_C + , h_CO + , h_CON + + , h_matching_connection + , h_matching_proxy_connection + , h_matching_content_length + , h_matching_transfer_encoding + , h_matching_upgrade + + , h_connection + , h_content_length + , h_transfer_encoding + , h_upgrade + + , h_matching_transfer_encoding_chunked + , h_matching_connection_keep_alive + , h_matching_connection_close + + , h_transfer_encoding_chunked + , h_connection_keep_alive + , h_connection_close + }; + + +/* Macros for character classes; depends on strict-mode */ +#define CR '\r' +#define LF '\n' +#define LOWER(c) (unsigned char)(c | 0x20) +#define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z') +#define IS_NUM(c) ((c) >= '0' && (c) <= '9') +#define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c)) +#define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f')) + +#if HTTP_PARSER_STRICT +#define TOKEN(c) (tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) (normal_url_char[(unsigned char) (c)]) +#define IS_HOST_CHAR(c) (IS_ALPHANUM(c) || (c) == '.' || (c) == '-') +#else +#define TOKEN(c) ((c == ' ') ? ' ' : tokens[(unsigned char)c]) +#define IS_URL_CHAR(c) \ + (normal_url_char[(unsigned char) (c)] || ((c) & 0x80)) +#define IS_HOST_CHAR(c) \ + (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') +#endif + + +#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) + + +#if HTTP_PARSER_STRICT +# define STRICT_CHECK(cond) \ +do { \ + if (cond) { \ + SET_ERRNO(HPE_STRICT); \ + goto error; \ + } \ +} while (0) +# define NEW_MESSAGE() (http_should_keep_alive(parser) ? start_state : s_dead) +#else +# define STRICT_CHECK(cond) +# define NEW_MESSAGE() start_state +#endif + + +/* Map errno values to strings for human-readable output */ +#define HTTP_STRERROR_GEN(n, s) { "HPE_" #n, s }, +static struct { + const char *name; + const char *description; +} http_strerror_tab[] = { + HTTP_ERRNO_MAP(HTTP_STRERROR_GEN) +}; +#undef HTTP_STRERROR_GEN + +int http_message_needs_eof(http_parser *parser); + +/* Our URL parser. + * + * This is designed to be shared by http_parser_execute() for URL validation, + * hence it has a state transition + byte-for-byte interface. In addition, it + * is meant to be embedded in http_parser_parse_url(), which does the dirty + * work of turning state transitions URL components for its API. + * + * This function should only be invoked with non-space characters. It is + * assumed that the caller cares about (and can detect) the transition between + * URL and non-URL states by looking for these. + */ +static enum state +parse_url_char(enum state s, const char ch) +{ + if (ch == ' ' || ch == '\r' || ch == '\n') { + return s_dead; + } + +#if HTTP_PARSER_STRICT + if (ch == '\t' || ch == '\f') { + return s_dead; + } +#endif + + switch (s) { + case s_req_spaces_before_url: + /* Proxied requests are followed by scheme of an absolute URI (alpha). + * All methods except CONNECT are followed by '/' or '*'. + */ + + if (ch == '/' || ch == '*') { + return s_req_path; + } + + if (IS_ALPHA(ch)) { + return s_req_schema; + } + + break; + + case s_req_schema: + if (IS_ALPHA(ch)) { + return s; + } + + if (ch == ':') { + return s_req_schema_slash; + } + + break; + + case s_req_schema_slash: + if (ch == '/') { + return s_req_schema_slash_slash; + } + + break; + + case s_req_schema_slash_slash: + if (ch == '/') { + return s_req_host_start; + } + + break; + + case s_req_host_start: + if (ch == '[') { + return s_req_host_v6_start; + } + + if (IS_HOST_CHAR(ch)) { + return s_req_host; + } + + break; + + case s_req_host: + if (IS_HOST_CHAR(ch)) { + return s_req_host; + } + + /* FALLTHROUGH */ + case s_req_host_v6_end: + switch (ch) { + case ':': + return s_req_port_start; + + case '/': + return s_req_path; + + case '?': + return s_req_query_string_start; + } + + break; + + case s_req_host_v6: + if (ch == ']') { + return s_req_host_v6_end; + } + + /* FALLTHROUGH */ + case s_req_host_v6_start: + if (IS_HEX(ch) || ch == ':') { + return s_req_host_v6; + } + break; + + case s_req_port: + switch (ch) { + case '/': + return s_req_path; + + case '?': + return s_req_query_string_start; + } + + /* FALLTHROUGH */ + case s_req_port_start: + if (IS_NUM(ch)) { + return s_req_port; + } + + break; + + case s_req_path: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + return s_req_query_string_start; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_query_string_start: + case s_req_query_string: + if (IS_URL_CHAR(ch)) { + return s_req_query_string; + } + + switch (ch) { + case '?': + /* allow extra '?' in query string */ + return s_req_query_string; + + case '#': + return s_req_fragment_start; + } + + break; + + case s_req_fragment_start: + if (IS_URL_CHAR(ch)) { + return s_req_fragment; + } + + switch (ch) { + case '?': + return s_req_fragment; + + case '#': + return s; + } + + break; + + case s_req_fragment: + if (IS_URL_CHAR(ch)) { + return s; + } + + switch (ch) { + case '?': + case '#': + return s; + } + + break; + + default: + break; + } + + /* We should never fall out of the switch above unless there's an error */ + return s_dead; +} + +size_t http_parser_execute (http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len) +{ + char c, ch; + int8_t unhex_val; + const char *p = data; + const char *header_field_mark = 0; + const char *header_value_mark = 0; + const char *url_mark = 0; + const char *body_mark = 0; + + /* We're in an error state. Don't bother doing anything. */ + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return 0; + } + + if (len == 0) { + switch (parser->state) { + case s_body_identity_eof: + /* Use of CALLBACK_NOTIFY() here would erroneously return 1 byte read if + * we got paused. + */ + CALLBACK_NOTIFY_NOADVANCE(message_complete); + return 0; + + case s_dead: + case s_start_req_or_res: + case s_start_res: + case s_start_req: + return 0; + + default: + SET_ERRNO(HPE_INVALID_EOF_STATE); + return 1; + } + } + + + if (parser->state == s_header_field) + header_field_mark = data; + if (parser->state == s_header_value) + header_value_mark = data; + switch (parser->state) { + case s_req_path: + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_host_start: + case s_req_host_v6_start: + case s_req_host_v6: + case s_req_host_v6_end: + case s_req_host: + case s_req_port_start: + case s_req_port: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + url_mark = data; + break; + } + + for (p=data; p != data + len; p++) { + ch = *p; + + if (PARSING_HEADER(parser->state)) { + ++parser->nread; + /* Buffer overflow attack */ + if (parser->nread > HTTP_MAX_HEADER_SIZE) { + SET_ERRNO(HPE_HEADER_OVERFLOW); + goto error; + } + } + + reexecute_byte: + switch (parser->state) { + + case s_dead: + /* this state is used after a 'Connection: close' message + * the parser will error out if it reads another message + */ + if (ch == CR || ch == LF) + break; + + SET_ERRNO(HPE_CLOSED_CONNECTION); + goto error; + + case s_start_req_or_res: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (ch == 'H') { + parser->state = s_res_or_resp_H; + + CALLBACK_NOTIFY(message_begin); + } else { + parser->type = HTTP_REQUEST; + parser->state = s_start_req; + goto reexecute_byte; + } + + break; + } + + case s_res_or_resp_H: + if (ch == 'T') { + parser->type = HTTP_RESPONSE; + parser->state = s_res_HT; + } else { + if (ch != 'E') { + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + parser->type = HTTP_REQUEST; + parser->method = HTTP_HEAD; + parser->index = 2; + parser->state = s_req_method; + } + break; + + case s_start_res: + { + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + switch (ch) { + case 'H': + parser->state = s_res_H; + break; + + case CR: + case LF: + break; + + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + + CALLBACK_NOTIFY(message_begin); + break; + } + + case s_res_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HT; + break; + + case s_res_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_res_HTT; + break; + + case s_res_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_res_HTTP; + break; + + case s_res_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_res_first_http_major; + break; + + case s_res_first_http_major: + if (ch < '0' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_res_http_major; + break; + + /* major HTTP version or dot */ + case s_res_http_major: + { + if (ch == '.') { + parser->state = s_res_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_res_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_res_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_res_http_minor: + { + if (ch == ' ') { + parser->state = s_res_first_status_code; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + case s_res_first_status_code: + { + if (!IS_NUM(ch)) { + if (ch == ' ') { + break; + } + + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + parser->status_code = ch - '0'; + parser->state = s_res_status_code; + break; + } + + case s_res_status_code: + { + if (!IS_NUM(ch)) { + switch (ch) { + case ' ': + parser->state = s_res_status; + break; + case CR: + parser->state = s_res_line_almost_done; + break; + case LF: + parser->state = s_header_field_start; + break; + default: + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + break; + } + + parser->status_code *= 10; + parser->status_code += ch - '0'; + + if (parser->status_code > 999) { + SET_ERRNO(HPE_INVALID_STATUS); + goto error; + } + + break; + } + + case s_res_status: + /* the human readable status. e.g. "NOT FOUND" + * we are not humans so just ignore this */ + if (ch == CR) { + parser->state = s_res_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + break; + + case s_res_line_almost_done: + STRICT_CHECK(ch != LF); + parser->state = s_header_field_start; + break; + + case s_start_req: + { + if (ch == CR || ch == LF) + break; + parser->flags = 0; + parser->content_length = ULLONG_MAX; + + if (!IS_ALPHA(ch)) { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + parser->method = (enum http_method) 0; + parser->index = 1; + switch (ch) { + case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; + case 'D': parser->method = HTTP_DELETE; break; + case 'G': parser->method = HTTP_GET; break; + case 'H': parser->method = HTTP_HEAD; break; + case 'L': parser->method = HTTP_LOCK; break; + case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH */ break; + case 'N': parser->method = HTTP_NOTIFY; break; + case 'O': parser->method = HTTP_OPTIONS; break; + case 'P': parser->method = HTTP_POST; + /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ + break; + case 'R': parser->method = HTTP_REPORT; break; + case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; + case 'T': parser->method = HTTP_TRACE; break; + case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; + default: + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + parser->state = s_req_method; + + CALLBACK_NOTIFY(message_begin); + + break; + } + + case s_req_method: + { + const char *matcher; + if (ch == '\0') { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + matcher = method_strings[parser->method]; + if (ch == ' ' && matcher[parser->index] == '\0') { + parser->state = s_req_spaces_before_url; + } else if (ch == matcher[parser->index]) { + ; /* nada */ + } else if (parser->method == HTTP_CONNECT) { + if (parser->index == 1 && ch == 'H') { + parser->method = HTTP_CHECKOUT; + } else if (parser->index == 2 && ch == 'P') { + parser->method = HTTP_COPY; + } else { + goto error; + } + } else if (parser->method == HTTP_MKCOL) { + if (parser->index == 1 && ch == 'O') { + parser->method = HTTP_MOVE; + } else if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_MERGE; + } else if (parser->index == 1 && ch == '-') { + parser->method = HTTP_MSEARCH; + } else if (parser->index == 2 && ch == 'A') { + parser->method = HTTP_MKACTIVITY; + } else { + goto error; + } + } else if (parser->method == HTTP_SUBSCRIBE) { + if (parser->index == 1 && ch == 'E') { + parser->method = HTTP_SEARCH; + } else { + goto error; + } + } else if (parser->index == 1 && parser->method == HTTP_POST) { + if (ch == 'R') { + parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ + } else if (ch == 'U') { + parser->method = HTTP_PUT; /* or HTTP_PURGE */ + } else if (ch == 'A') { + parser->method = HTTP_PATCH; + } else { + goto error; + } + } else if (parser->index == 2) { + if (parser->method == HTTP_PUT) { + if (ch == 'R') parser->method = HTTP_PURGE; + } else if (parser->method == HTTP_UNLOCK) { + if (ch == 'S') parser->method = HTTP_UNSUBSCRIBE; + } + } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { + parser->method = HTTP_PROPPATCH; + } else { + SET_ERRNO(HPE_INVALID_METHOD); + goto error; + } + + ++parser->index; + break; + } + + case s_req_spaces_before_url: + { + if (ch == ' ') break; + + MARK(url); + if (parser->method == HTTP_CONNECT) { + parser->state = s_req_host_start; + } + + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + + break; + } + + case s_req_schema: + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_host_start: + case s_req_host_v6_start: + case s_req_host_v6: + case s_req_port_start: + { + switch (ch) { + /* No whitespace allowed here */ + case ' ': + case CR: + case LF: + SET_ERRNO(HPE_INVALID_URL); + goto error; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + + break; + } + + case s_req_host: + case s_req_host_v6_end: + case s_req_port: + case s_req_path: + case s_req_query_string_start: + case s_req_query_string: + case s_req_fragment_start: + case s_req_fragment: + { + switch (ch) { + case ' ': + parser->state = s_req_http_start; + CALLBACK_DATA(url); + break; + case CR: + case LF: + parser->http_major = 0; + parser->http_minor = 9; + parser->state = (ch == CR) ? + s_req_line_almost_done : + s_header_field_start; + CALLBACK_DATA(url); + break; + default: + parser->state = parse_url_char((enum state)parser->state, ch); + if (parser->state == s_dead) { + SET_ERRNO(HPE_INVALID_URL); + goto error; + } + } + break; + } + + case s_req_http_start: + switch (ch) { + case 'H': + parser->state = s_req_http_H; + break; + case ' ': + break; + default: + SET_ERRNO(HPE_INVALID_CONSTANT); + goto error; + } + break; + + case s_req_http_H: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HT; + break; + + case s_req_http_HT: + STRICT_CHECK(ch != 'T'); + parser->state = s_req_http_HTT; + break; + + case s_req_http_HTT: + STRICT_CHECK(ch != 'P'); + parser->state = s_req_http_HTTP; + break; + + case s_req_http_HTTP: + STRICT_CHECK(ch != '/'); + parser->state = s_req_first_http_major; + break; + + /* first digit of major HTTP version */ + case s_req_first_http_major: + if (ch < '1' || ch > '9') { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major = ch - '0'; + parser->state = s_req_http_major; + break; + + /* major HTTP version or dot */ + case s_req_http_major: + { + if (ch == '.') { + parser->state = s_req_first_http_minor; + break; + } + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_major *= 10; + parser->http_major += ch - '0'; + + if (parser->http_major > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* first digit of minor HTTP version */ + case s_req_first_http_minor: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor = ch - '0'; + parser->state = s_req_http_minor; + break; + + /* minor HTTP version or end of request line */ + case s_req_http_minor: + { + if (ch == CR) { + parser->state = s_req_line_almost_done; + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + break; + } + + /* XXX allow spaces after digit? */ + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + parser->http_minor *= 10; + parser->http_minor += ch - '0'; + + if (parser->http_minor > 999) { + SET_ERRNO(HPE_INVALID_VERSION); + goto error; + } + + break; + } + + /* end of request line */ + case s_req_line_almost_done: + { + if (ch != LF) { + SET_ERRNO(HPE_LF_EXPECTED); + goto error; + } + + parser->state = s_header_field_start; + break; + } + + case s_header_field_start: + { + if (ch == CR) { + parser->state = s_headers_almost_done; + break; + } + + if (ch == LF) { + /* they might be just sending \n instead of \r\n so this would be + * the second \n to denote the end of headers*/ + parser->state = s_headers_almost_done; + goto reexecute_byte; + } + + c = TOKEN(ch); + + if (!c) { + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + MARK(header_field); + + parser->index = 0; + parser->state = s_header_field; + + switch (c) { + case 'c': + parser->header_state = h_C; + break; + + case 'p': + parser->header_state = h_matching_proxy_connection; + break; + + case 't': + parser->header_state = h_matching_transfer_encoding; + break; + + case 'u': + parser->header_state = h_matching_upgrade; + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_field: + { + c = TOKEN(ch); + + if (c) { + switch (parser->header_state) { + case h_general: + break; + + case h_C: + parser->index++; + parser->header_state = (c == 'o' ? h_CO : h_general); + break; + + case h_CO: + parser->index++; + parser->header_state = (c == 'n' ? h_CON : h_general); + break; + + case h_CON: + parser->index++; + switch (c) { + case 'n': + parser->header_state = h_matching_connection; + break; + case 't': + parser->header_state = h_matching_content_length; + break; + default: + parser->header_state = h_general; + break; + } + break; + + /* connection */ + + case h_matching_connection: + parser->index++; + if (parser->index > sizeof(CONNECTION)-1 + || c != CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* proxy-connection */ + + case h_matching_proxy_connection: + parser->index++; + if (parser->index > sizeof(PROXY_CONNECTION)-1 + || c != PROXY_CONNECTION[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(PROXY_CONNECTION)-2) { + parser->header_state = h_connection; + } + break; + + /* content-length */ + + case h_matching_content_length: + parser->index++; + if (parser->index > sizeof(CONTENT_LENGTH)-1 + || c != CONTENT_LENGTH[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CONTENT_LENGTH)-2) { + parser->header_state = h_content_length; + } + break; + + /* transfer-encoding */ + + case h_matching_transfer_encoding: + parser->index++; + if (parser->index > sizeof(TRANSFER_ENCODING)-1 + || c != TRANSFER_ENCODING[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(TRANSFER_ENCODING)-2) { + parser->header_state = h_transfer_encoding; + } + break; + + /* upgrade */ + + case h_matching_upgrade: + parser->index++; + if (parser->index > sizeof(UPGRADE)-1 + || c != UPGRADE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(UPGRADE)-2) { + parser->header_state = h_upgrade; + } + break; + + case h_connection: + case h_content_length: + case h_transfer_encoding: + case h_upgrade: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + assert(0 && "Unknown header_state"); + break; + } + break; + } + + if (ch == ':') { + parser->state = s_header_value_start; + CALLBACK_DATA(header_field); + break; + } + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_field); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_field); + break; + } + + SET_ERRNO(HPE_INVALID_HEADER_TOKEN); + goto error; + } + + case s_header_value_start: + { + if (ch == ' ' || ch == '\t') break; + + MARK(header_value); + + parser->state = s_header_value; + parser->index = 0; + + if (ch == CR) { + parser->header_state = h_general; + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_field_start; + CALLBACK_DATA(header_value); + break; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_upgrade: + parser->flags |= F_UPGRADE; + parser->header_state = h_general; + break; + + case h_transfer_encoding: + /* looking for 'Transfer-Encoding: chunked' */ + if ('c' == c) { + parser->header_state = h_matching_transfer_encoding_chunked; + } else { + parser->header_state = h_general; + } + break; + + case h_content_length: + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = ch - '0'; + break; + + case h_connection: + /* looking for 'Connection: keep-alive' */ + if (c == 'k') { + parser->header_state = h_matching_connection_keep_alive; + /* looking for 'Connection: close' */ + } else if (c == 'c') { + parser->header_state = h_matching_connection_close; + } else { + parser->header_state = h_general; + } + break; + + default: + parser->header_state = h_general; + break; + } + break; + } + + case s_header_value: + { + + if (ch == CR) { + parser->state = s_header_almost_done; + CALLBACK_DATA(header_value); + break; + } + + if (ch == LF) { + parser->state = s_header_almost_done; + CALLBACK_DATA_NOADVANCE(header_value); + goto reexecute_byte; + } + + c = LOWER(ch); + + switch (parser->header_state) { + case h_general: + break; + + case h_connection: + case h_transfer_encoding: + assert(0 && "Shouldn't get here."); + break; + + case h_content_length: + { + uint64_t t; + + if (ch == ' ') break; + + if (!IS_NUM(ch)) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + t = parser->content_length; + t *= 10; + t += ch - '0'; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + /* Transfer-Encoding: chunked */ + case h_matching_transfer_encoding_chunked: + parser->index++; + if (parser->index > sizeof(CHUNKED)-1 + || c != CHUNKED[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CHUNKED)-2) { + parser->header_state = h_transfer_encoding_chunked; + } + break; + + /* looking for 'Connection: keep-alive' */ + case h_matching_connection_keep_alive: + parser->index++; + if (parser->index > sizeof(KEEP_ALIVE)-1 + || c != KEEP_ALIVE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(KEEP_ALIVE)-2) { + parser->header_state = h_connection_keep_alive; + } + break; + + /* looking for 'Connection: close' */ + case h_matching_connection_close: + parser->index++; + if (parser->index > sizeof(CLOSE)-1 || c != CLOSE[parser->index]) { + parser->header_state = h_general; + } else if (parser->index == sizeof(CLOSE)-2) { + parser->header_state = h_connection_close; + } + break; + + case h_transfer_encoding_chunked: + case h_connection_keep_alive: + case h_connection_close: + if (ch != ' ') parser->header_state = h_general; + break; + + default: + parser->state = s_header_value; + parser->header_state = h_general; + break; + } + break; + } + + case s_header_almost_done: + { + STRICT_CHECK(ch != LF); + + parser->state = s_header_value_lws; + + switch (parser->header_state) { + case h_connection_keep_alive: + parser->flags |= F_CONNECTION_KEEP_ALIVE; + break; + case h_connection_close: + parser->flags |= F_CONNECTION_CLOSE; + break; + case h_transfer_encoding_chunked: + parser->flags |= F_CHUNKED; + break; + default: + break; + } + + break; + } + + case s_header_value_lws: + { + if (ch == ' ' || ch == '\t') + parser->state = s_header_value_start; + else + { + parser->state = s_header_field_start; + goto reexecute_byte; + } + break; + } + + case s_headers_almost_done: + { + STRICT_CHECK(ch != LF); + + if (parser->flags & F_TRAILING) { + /* End of a chunked request */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + } + + parser->state = s_headers_done; + + /* Set this here so that on_headers_complete() callbacks can see it */ + parser->upgrade = + (parser->flags & F_UPGRADE || parser->method == HTTP_CONNECT); + + /* Here we call the headers_complete callback. This is somewhat + * different than other callbacks because if the user returns 1, we + * will interpret that as saying that this message has no body. This + * is needed for the annoying case of recieving a response to a HEAD + * request. + * + * We'd like to use CALLBACK_NOTIFY_NOADVANCE() here but we cannot, so + * we have to simulate it by handling a change in errno below. + */ + if (settings->on_headers_complete) { + switch (settings->on_headers_complete(parser)) { + case 0: + break; + + case 1: + parser->flags |= F_SKIPBODY; + break; + + default: + SET_ERRNO(HPE_CB_headers_complete); + return p - data; /* Error */ + } + } + + if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { + return p - data; + } + + goto reexecute_byte; + } + + case s_headers_done: + { + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + /* Exit, the rest of the connect is in a different protocol. */ + if (parser->upgrade) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + return (p - data) + 1; + } + + if (parser->flags & F_SKIPBODY) { + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->flags & F_CHUNKED) { + /* chunked encoding - ignore Content-Length header */ + parser->state = s_chunk_size_start; + } else { + if (parser->content_length == 0) { + /* Content-Length header given but zero: Content-Length: 0\r\n */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else if (parser->content_length != ULLONG_MAX) { + /* Content-Length header given and non-zero */ + parser->state = s_body_identity; + } else { + if (parser->type == HTTP_REQUEST || + !http_message_needs_eof(parser)) { + /* Assume content-length 0 - read the next */ + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + } else { + /* Read body until EOF */ + parser->state = s_body_identity_eof; + } + } + } + + break; + } + + case s_body_identity: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* The difference between advancing content_length and p is because + * the latter will automaticaly advance on the next loop iteration. + * Further, if content_length ends up at 0, we want to see the last + * byte again for our message complete callback. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_message_done; + + /* Mimic CALLBACK_DATA_NOADVANCE() but with one extra byte. + * + * The alternative to doing this is to wait for the next byte to + * trigger the data callback, just as in every other case. The + * problem with this is that this makes it difficult for the test + * harness to distinguish between complete-on-EOF and + * complete-on-length. It's not clear that this distinction is + * important for applications, but let's keep it for now. + */ + CALLBACK_DATA_(body, p - body_mark + 1, p - data); + goto reexecute_byte; + } + + break; + } + + /* read until EOF */ + case s_body_identity_eof: + MARK(body); + p = data + len - 1; + + break; + + case s_message_done: + parser->state = NEW_MESSAGE(); + CALLBACK_NOTIFY(message_complete); + break; + + case s_chunk_size_start: + { + assert(parser->nread == 1); + assert(parser->flags & F_CHUNKED); + + unhex_val = unhex[(unsigned char)ch]; + if (unhex_val == -1) { + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + parser->content_length = unhex_val; + parser->state = s_chunk_size; + break; + } + + case s_chunk_size: + { + uint64_t t; + + assert(parser->flags & F_CHUNKED); + + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + + unhex_val = unhex[(unsigned char)ch]; + + if (unhex_val == -1) { + if (ch == ';' || ch == ' ') { + parser->state = s_chunk_parameters; + break; + } + + SET_ERRNO(HPE_INVALID_CHUNK_SIZE); + goto error; + } + + t = parser->content_length; + t *= 16; + t += unhex_val; + + /* Overflow? */ + if (t < parser->content_length || t == ULLONG_MAX) { + SET_ERRNO(HPE_INVALID_CONTENT_LENGTH); + goto error; + } + + parser->content_length = t; + break; + } + + case s_chunk_parameters: + { + assert(parser->flags & F_CHUNKED); + /* just ignore this shit. TODO check for overflow */ + if (ch == CR) { + parser->state = s_chunk_size_almost_done; + break; + } + break; + } + + case s_chunk_size_almost_done: + { + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + + parser->nread = 0; + + if (parser->content_length == 0) { + parser->flags |= F_TRAILING; + parser->state = s_header_field_start; + } else { + parser->state = s_chunk_data; + } + break; + } + + case s_chunk_data: + { + uint64_t to_read = MIN(parser->content_length, + (uint64_t) ((data + len) - p)); + + assert(parser->flags & F_CHUNKED); + assert(parser->content_length != 0 + && parser->content_length != ULLONG_MAX); + + /* See the explanation in s_body_identity for why the content + * length and data pointers are managed this way. + */ + MARK(body); + parser->content_length -= to_read; + p += to_read - 1; + + if (parser->content_length == 0) { + parser->state = s_chunk_data_almost_done; + } + + break; + } + + case s_chunk_data_almost_done: + assert(parser->flags & F_CHUNKED); + assert(parser->content_length == 0); + STRICT_CHECK(ch != CR); + parser->state = s_chunk_data_done; + CALLBACK_DATA(body); + break; + + case s_chunk_data_done: + assert(parser->flags & F_CHUNKED); + STRICT_CHECK(ch != LF); + parser->nread = 0; + parser->state = s_chunk_size_start; + break; + + default: + assert(0 && "unhandled state"); + SET_ERRNO(HPE_INVALID_INTERNAL_STATE); + goto error; + } + } + + /* Run callbacks for any marks that we have leftover after we ran our of + * bytes. There should be at most one of these set, so it's OK to invoke + * them in series (unset marks will not result in callbacks). + * + * We use the NOADVANCE() variety of callbacks here because 'p' has already + * overflowed 'data' and this allows us to correct for the off-by-one that + * we'd otherwise have (since CALLBACK_DATA() is meant to be run with a 'p' + * value that's in-bounds). + */ + + assert(((header_field_mark ? 1 : 0) + + (header_value_mark ? 1 : 0) + + (url_mark ? 1 : 0) + + (body_mark ? 1 : 0)) <= 1); + + CALLBACK_DATA_NOADVANCE(header_field); + CALLBACK_DATA_NOADVANCE(header_value); + CALLBACK_DATA_NOADVANCE(url); + CALLBACK_DATA_NOADVANCE(body); + + return len; + +error: + if (HTTP_PARSER_ERRNO(parser) == HPE_OK) { + SET_ERRNO(HPE_UNKNOWN); + } + + return (p - data); +} + + +/* Does the parser need to see an EOF to find the end of the message? */ +int +http_message_needs_eof (http_parser *parser) +{ + if (parser->type == HTTP_REQUEST) { + return 0; + } + + /* See RFC 2616 section 4.4 */ + if (parser->status_code / 100 == 1 || /* 1xx e.g. Continue */ + parser->status_code == 204 || /* No Content */ + parser->status_code == 304 || /* Not Modified */ + parser->flags & F_SKIPBODY) { /* response to a HEAD request */ + return 0; + } + + if ((parser->flags & F_CHUNKED) || parser->content_length != ULLONG_MAX) { + return 0; + } + + return 1; +} + + +int +http_should_keep_alive (http_parser *parser) +{ + if (parser->http_major > 0 && parser->http_minor > 0) { + /* HTTP/1.1 */ + if (parser->flags & F_CONNECTION_CLOSE) { + return 0; + } + } else { + /* HTTP/1.0 or earlier */ + if (!(parser->flags & F_CONNECTION_KEEP_ALIVE)) { + return 0; + } + } + + return !http_message_needs_eof(parser); +} + + +const char * http_method_str (enum http_method m) +{ + return method_strings[m]; +} + + +void +http_parser_init (http_parser *parser, enum http_parser_type t) +{ + void *data = parser->data; /* preserve application data */ + memset(parser, 0, sizeof(*parser)); + parser->data = data; + parser->type = t; + parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_req_or_res)); + parser->http_errno = HPE_OK; +} + +const char * +http_errno_name(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].name; +} + +const char * +http_errno_description(enum http_errno err) { + assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); + return http_strerror_tab[err].description; +} + +int +http_parser_parse_url(const char *buf, size_t buflen, int is_connect, + struct http_parser_url *u) +{ + enum state s; + const char *p; + enum http_parser_url_fields uf, old_uf; + + u->port = u->field_set = 0; + s = is_connect ? s_req_host_start : s_req_spaces_before_url; + uf = old_uf = UF_MAX; + + for (p = buf; p < buf + buflen; p++) { + s = parse_url_char(s, *p); + + /* Figure out the next field that we're operating on */ + switch (s) { + case s_dead: + return 1; + + /* Skip delimeters */ + case s_req_schema_slash: + case s_req_schema_slash_slash: + case s_req_host_start: + case s_req_host_v6_start: + case s_req_host_v6_end: + case s_req_port_start: + case s_req_query_string_start: + case s_req_fragment_start: + continue; + + case s_req_schema: + uf = UF_SCHEMA; + break; + + case s_req_host: + case s_req_host_v6: + uf = UF_HOST; + break; + + case s_req_port: + uf = UF_PORT; + break; + + case s_req_path: + uf = UF_PATH; + break; + + case s_req_query_string: + uf = UF_QUERY; + break; + + case s_req_fragment: + uf = UF_FRAGMENT; + break; + + default: + assert(!"Unexpected state"); + return 1; + } + + /* Nothing's changed; soldier on */ + if (uf == old_uf) { + u->field_data[uf].len++; + continue; + } + + u->field_data[uf].off = p - buf; + u->field_data[uf].len = 1; + + u->field_set |= (1 << uf); + old_uf = uf; + } + + /* CONNECT requests can only contain "hostname:port" */ + if (is_connect && u->field_set != ((1 << UF_HOST)|(1 << UF_PORT))) { + return 1; + } + + /* Make sure we don't end somewhere unexpected */ + switch (s) { + case s_req_host_v6_start: + case s_req_host_v6: + case s_req_host_v6_end: + case s_req_host: + case s_req_port_start: + return 1; + default: + break; + } + + if (u->field_set & (1 << UF_PORT)) { + /* Don't bother with endp; we've already validated the string */ + unsigned long v = strtoul(buf + u->field_data[UF_PORT].off, NULL, 10); + + /* Ports have a max value of 2^16 */ + if (v > 0xffff) { + return 1; + } + + u->port = (uint16_t) v; + } + + return 0; +} + +void +http_parser_pause(http_parser *parser, int paused) { + /* Users should only be pausing/unpausing a parser that is not in an error + * state. In non-debug builds, there's not much that we can do about this + * other than ignore it. + */ + if (HTTP_PARSER_ERRNO(parser) == HPE_OK || + HTTP_PARSER_ERRNO(parser) == HPE_PAUSED) { + SET_ERRNO((paused) ? HPE_PAUSED : HPE_OK); + } else { + assert(0 && "Attempting to pause parser in error state"); + } +} diff --git a/test/fixtures/c/http_parser.h b/test/fixtures/c/http_parser.h new file mode 100644 index 00000000..8ed41803 --- /dev/null +++ b/test/fixtures/c/http_parser.h @@ -0,0 +1,318 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +#ifndef http_parser_h +#define http_parser_h +#ifdef __cplusplus +extern "C" { +#endif + +#define HTTP_PARSER_VERSION_MAJOR 1 +#define HTTP_PARSER_VERSION_MINOR 0 + +#include +#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; + +typedef unsigned int size_t; +typedef int ssize_t; +#else +#include +#endif + +/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run + * faster + */ +#ifndef HTTP_PARSER_STRICT +# define HTTP_PARSER_STRICT 1 +#endif + +/* Compile with -DHTTP_PARSER_DEBUG=1 to add extra debugging information to + * the error reporting facility. + */ +#ifndef HTTP_PARSER_DEBUG +# define HTTP_PARSER_DEBUG 0 +#endif + + +/* Maximium header size allowed */ +#define HTTP_MAX_HEADER_SIZE (80*1024) + + +typedef struct http_parser http_parser; +typedef struct http_parser_settings http_parser_settings; + + +/* Callbacks should return non-zero to indicate an error. The parser will + * then halt execution. + * + * The one exception is on_headers_complete. In a HTTP_RESPONSE parser + * returning '1' from on_headers_complete will tell the parser that it + * should not expect a body. This is used when receiving a response to a + * HEAD request which may contain 'Content-Length' or 'Transfer-Encoding: + * chunked' headers that indicate the presence of a body. + * + * http_data_cb does not return data chunks. It will be call arbitrarally + * many times for each string. E.G. you might get 10 callbacks for "on_path" + * each providing just a few characters more data. + */ +typedef int (*http_data_cb) (http_parser*, const char *at, size_t length); +typedef int (*http_cb) (http_parser*); + + +/* Request Methods */ +#define HTTP_METHOD_MAP(XX) \ + XX(0, DELETE, DELETE) \ + XX(1, GET, GET) \ + XX(2, HEAD, HEAD) \ + XX(3, POST, POST) \ + XX(4, PUT, PUT) \ + /* pathological */ \ + XX(5, CONNECT, CONNECT) \ + XX(6, OPTIONS, OPTIONS) \ + XX(7, TRACE, TRACE) \ + /* webdav */ \ + XX(8, COPY, COPY) \ + XX(9, LOCK, LOCK) \ + XX(10, MKCOL, MKCOL) \ + XX(11, MOVE, MOVE) \ + XX(12, PROPFIND, PROPFIND) \ + XX(13, PROPPATCH, PROPPATCH) \ + XX(14, SEARCH, SEARCH) \ + XX(15, UNLOCK, UNLOCK) \ + /* subversion */ \ + XX(16, REPORT, REPORT) \ + XX(17, MKACTIVITY, MKACTIVITY) \ + XX(18, CHECKOUT, CHECKOUT) \ + XX(19, MERGE, MERGE) \ + /* upnp */ \ + XX(20, MSEARCH, M-SEARCH) \ + XX(21, NOTIFY, NOTIFY) \ + XX(22, SUBSCRIBE, SUBSCRIBE) \ + XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \ + /* RFC-5789 */ \ + XX(24, PATCH, PATCH) \ + XX(25, PURGE, PURGE) \ + +enum http_method + { +#define XX(num, name, string) HTTP_##name = num, + HTTP_METHOD_MAP(XX) +#undef XX + }; + + +enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH }; + + +/* Flag values for http_parser.flags field */ +enum flags + { F_CHUNKED = 1 << 0 + , F_CONNECTION_KEEP_ALIVE = 1 << 1 + , F_CONNECTION_CLOSE = 1 << 2 + , F_TRAILING = 1 << 3 + , F_UPGRADE = 1 << 4 + , F_SKIPBODY = 1 << 5 + }; + + +/* Map for errno-related constants + * + * The provided argument should be a macro that takes 2 arguments. + */ +#define HTTP_ERRNO_MAP(XX) \ + /* No error */ \ + XX(OK, "success") \ + \ + /* Callback-related errors */ \ + XX(CB_message_begin, "the on_message_begin callback failed") \ + XX(CB_url, "the on_url callback failed") \ + XX(CB_header_field, "the on_header_field callback failed") \ + XX(CB_header_value, "the on_header_value callback failed") \ + XX(CB_headers_complete, "the on_headers_complete callback failed") \ + XX(CB_body, "the on_body callback failed") \ + XX(CB_message_complete, "the on_message_complete callback failed") \ + \ + /* Parsing-related errors */ \ + XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ + XX(HEADER_OVERFLOW, \ + "too many header bytes seen; overflow detected") \ + XX(CLOSED_CONNECTION, \ + "data received after completed connection: close message") \ + XX(INVALID_VERSION, "invalid HTTP version") \ + XX(INVALID_STATUS, "invalid HTTP status code") \ + XX(INVALID_METHOD, "invalid HTTP method") \ + XX(INVALID_URL, "invalid URL") \ + XX(INVALID_HOST, "invalid host") \ + XX(INVALID_PORT, "invalid port") \ + XX(INVALID_PATH, "invalid path") \ + XX(INVALID_QUERY_STRING, "invalid query string") \ + XX(INVALID_FRAGMENT, "invalid fragment") \ + XX(LF_EXPECTED, "LF character expected") \ + XX(INVALID_HEADER_TOKEN, "invalid character in header") \ + XX(INVALID_CONTENT_LENGTH, \ + "invalid character in content-length header") \ + XX(INVALID_CHUNK_SIZE, \ + "invalid character in chunk size header") \ + XX(INVALID_CONSTANT, "invalid constant string") \ + XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\ + XX(STRICT, "strict mode assertion failed") \ + XX(PAUSED, "parser is paused") \ + XX(UNKNOWN, "an unknown error occurred") + + +/* Define HPE_* values for each errno value above */ +#define HTTP_ERRNO_GEN(n, s) HPE_##n, +enum http_errno { + HTTP_ERRNO_MAP(HTTP_ERRNO_GEN) +}; +#undef HTTP_ERRNO_GEN + + +/* Get an http_errno value from an http_parser */ +#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno) + +/* Get the line number that generated the current error */ +#if HTTP_PARSER_DEBUG +#define HTTP_PARSER_ERRNO_LINE(p) ((p)->error_lineno) +#else +#define HTTP_PARSER_ERRNO_LINE(p) 0 +#endif + + +struct http_parser { + /** PRIVATE **/ + unsigned char type : 2; /* enum http_parser_type */ + unsigned char flags : 6; /* F_* values from 'flags' enum; semi-public */ + unsigned char state; /* enum state from http_parser.c */ + unsigned char header_state; /* enum header_state from http_parser.c */ + unsigned char index; /* index into current matcher */ + + uint32_t nread; /* # bytes read in various scenarios */ + uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ + + /** READ-ONLY **/ + unsigned short http_major; + unsigned short http_minor; + unsigned short status_code; /* responses only */ + unsigned char method; /* requests only */ + unsigned char http_errno : 7; + + /* 1 = Upgrade header was present and the parser has exited because of that. + * 0 = No upgrade header present. + * Should be checked when http_parser_execute() returns in addition to + * error checking. + */ + unsigned char upgrade : 1; + +#if HTTP_PARSER_DEBUG + uint32_t error_lineno; +#endif + + /** PUBLIC **/ + void *data; /* A pointer to get hook to the "connection" or "socket" object */ +}; + + +struct http_parser_settings { + http_cb on_message_begin; + http_data_cb on_url; + http_data_cb on_header_field; + http_data_cb on_header_value; + http_cb on_headers_complete; + http_data_cb on_body; + http_cb on_message_complete; +}; + + +enum http_parser_url_fields + { UF_SCHEMA = 0 + , UF_HOST = 1 + , UF_PORT = 2 + , UF_PATH = 3 + , UF_QUERY = 4 + , UF_FRAGMENT = 5 + , UF_MAX = 6 + }; + + +/* Result structure for http_parser_parse_url(). + * + * Callers should index into field_data[] with UF_* values iff field_set + * has the relevant (1 << UF_*) bit set. As a courtesy to clients (and + * because we probably have padding left over), we convert any port to + * a uint16_t. + */ +struct http_parser_url { + uint16_t field_set; /* Bitmask of (1 << UF_*) values */ + uint16_t port; /* Converted UF_PORT string */ + + struct { + uint16_t off; /* Offset into buffer in which field starts */ + uint16_t len; /* Length of run in buffer */ + } field_data[UF_MAX]; +}; + + +void http_parser_init(http_parser *parser, enum http_parser_type type); + + +size_t http_parser_execute(http_parser *parser, + const http_parser_settings *settings, + const char *data, + size_t len); + + +/* If http_should_keep_alive() in the on_headers_complete or + * on_message_complete callback returns true, then this will be should be + * the last message on the connection. + * If you are the server, respond with the "Connection: close" header. + * If you are the client, close the connection. + */ +int http_should_keep_alive(http_parser *parser); + +/* Returns a string version of the HTTP method. */ +const char *http_method_str(enum http_method m); + +/* Return a string name of the given error */ +const char *http_errno_name(enum http_errno err); + +/* Return a string description of the given error */ +const char *http_errno_description(enum http_errno err); + +/* Parse a URL; return nonzero on failure */ +int http_parser_parse_url(const char *buf, size_t buflen, + int is_connect, + struct http_parser_url *u); + +/* Pause or un-pause the parser; a nonzero value pauses */ +void http_parser_pause(http_parser *parser, int paused); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/test/fixtures/c/markdown.c b/test/fixtures/c/markdown.c new file mode 100644 index 00000000..260483d6 --- /dev/null +++ b/test/fixtures/c/markdown.c @@ -0,0 +1,2551 @@ +/* markdown.c - generic markdown parser */ + +/* + * Copyright (c) 2009, Natacha Porté + * Copyright (c) 2011, Vicent Marti + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "markdown.h" +#include "stack.h" + +#include +#include +#include +#include + +#if defined(_WIN32) +#define strncasecmp _strnicmp +#endif + +#define REF_TABLE_SIZE 8 + +#define BUFFER_BLOCK 0 +#define BUFFER_SPAN 1 + +#define MKD_LI_END 8 /* internal list flag */ + +#define gperf_case_strncmp(s1, s2, n) strncasecmp(s1, s2, n) +#define GPERF_DOWNCASE 1 +#define GPERF_CASE_STRNCMP 1 +#include "html_blocks.h" + +/*************** + * LOCAL TYPES * + ***************/ + +/* link_ref: reference to a link */ +struct link_ref { + unsigned int id; + + struct buf *link; + struct buf *title; + + struct link_ref *next; +}; + +/* char_trigger: function pointer to render active chars */ +/* returns the number of chars taken care of */ +/* data is the pointer of the beginning of the span */ +/* offset is the number of valid chars before data */ +struct sd_markdown; +typedef size_t +(*char_trigger)(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); + +static size_t char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); +static size_t char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size); + +enum markdown_char_t { + MD_CHAR_NONE = 0, + MD_CHAR_EMPHASIS, + MD_CHAR_CODESPAN, + MD_CHAR_LINEBREAK, + MD_CHAR_LINK, + MD_CHAR_LANGLE, + MD_CHAR_ESCAPE, + MD_CHAR_ENTITITY, + MD_CHAR_AUTOLINK_URL, + MD_CHAR_AUTOLINK_EMAIL, + MD_CHAR_AUTOLINK_WWW, + MD_CHAR_SUPERSCRIPT, +}; + +static char_trigger markdown_char_ptrs[] = { + NULL, + &char_emphasis, + &char_codespan, + &char_linebreak, + &char_link, + &char_langle_tag, + &char_escape, + &char_entity, + &char_autolink_url, + &char_autolink_email, + &char_autolink_www, + &char_superscript, +}; + +/* render • structure containing one particular render */ +struct sd_markdown { + struct sd_callbacks cb; + void *opaque; + + struct link_ref *refs[REF_TABLE_SIZE]; + uint8_t active_char[256]; + struct stack work_bufs[2]; + unsigned int ext_flags; + size_t max_nesting; + int in_link_body; +}; + +/*************************** + * HELPER FUNCTIONS * + ***************************/ + +static inline struct buf * +rndr_newbuf(struct sd_markdown *rndr, int type) +{ + static const size_t buf_size[2] = {256, 64}; + struct buf *work = NULL; + struct stack *pool = &rndr->work_bufs[type]; + + if (pool->size < pool->asize && + pool->item[pool->size] != NULL) { + work = pool->item[pool->size++]; + work->size = 0; + } else { + work = bufnew(buf_size[type]); + stack_push(pool, work); + } + + return work; +} + +static inline void +rndr_popbuf(struct sd_markdown *rndr, int type) +{ + rndr->work_bufs[type].size--; +} + +static void +unscape_text(struct buf *ob, struct buf *src) +{ + size_t i = 0, org; + while (i < src->size) { + org = i; + while (i < src->size && src->data[i] != '\\') + i++; + + if (i > org) + bufput(ob, src->data + org, i - org); + + if (i + 1 >= src->size) + break; + + bufputc(ob, src->data[i + 1]); + i += 2; + } +} + +static unsigned int +hash_link_ref(const uint8_t *link_ref, size_t length) +{ + size_t i; + unsigned int hash = 0; + + for (i = 0; i < length; ++i) + hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +static struct link_ref * +add_link_ref( + struct link_ref **references, + const uint8_t *name, size_t name_size) +{ + struct link_ref *ref = calloc(1, sizeof(struct link_ref)); + + if (!ref) + return NULL; + + ref->id = hash_link_ref(name, name_size); + ref->next = references[ref->id % REF_TABLE_SIZE]; + + references[ref->id % REF_TABLE_SIZE] = ref; + return ref; +} + +static struct link_ref * +find_link_ref(struct link_ref **references, uint8_t *name, size_t length) +{ + unsigned int hash = hash_link_ref(name, length); + struct link_ref *ref = NULL; + + ref = references[hash % REF_TABLE_SIZE]; + + while (ref != NULL) { + if (ref->id == hash) + return ref; + + ref = ref->next; + } + + return NULL; +} + +static void +free_link_refs(struct link_ref **references) +{ + size_t i; + + for (i = 0; i < REF_TABLE_SIZE; ++i) { + struct link_ref *r = references[i]; + struct link_ref *next; + + while (r) { + next = r->next; + bufrelease(r->link); + bufrelease(r->title); + free(r); + r = next; + } + } +} + +/* + * Check whether a char is a Markdown space. + + * Right now we only consider spaces the actual + * space and a newline: tabs and carriage returns + * are filtered out during the preprocessing phase. + * + * If we wanted to actually be UTF-8 compliant, we + * should instead extract an Unicode codepoint from + * this character and check for space properties. + */ +static inline int +_isspace(int c) +{ + return c == ' ' || c == '\n'; +} + +/**************************** + * INLINE PARSING FUNCTIONS * + ****************************/ + +/* is_mail_autolink • looks for the address part of a mail autolink and '>' */ +/* this is less strict than the original markdown e-mail address matching */ +static size_t +is_mail_autolink(uint8_t *data, size_t size) +{ + size_t i = 0, nb = 0; + + /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ + for (i = 0; i < size; ++i) { + if (isalnum(data[i])) + continue; + + switch (data[i]) { + case '@': + nb++; + + case '-': + case '.': + case '_': + break; + + case '>': + return (nb == 1) ? i + 1 : 0; + + default: + return 0; + } + } + + return 0; +} + +/* tag_length • returns the length of the given tag, or 0 is it's not valid */ +static size_t +tag_length(uint8_t *data, size_t size, enum mkd_autolink *autolink) +{ + size_t i, j; + + /* a valid tag can't be shorter than 3 chars */ + if (size < 3) return 0; + + /* begins with a '<' optionally followed by '/', followed by letter or number */ + if (data[0] != '<') return 0; + i = (data[1] == '/') ? 2 : 1; + + if (!isalnum(data[i])) + return 0; + + /* scheme test */ + *autolink = MKDA_NOT_AUTOLINK; + + /* try to find the beginning of an URI */ + while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) + i++; + + if (i > 1 && data[i] == '@') { + if ((j = is_mail_autolink(data + i, size - i)) != 0) { + *autolink = MKDA_EMAIL; + return i + j; + } + } + + if (i > 2 && data[i] == ':') { + *autolink = MKDA_NORMAL; + i++; + } + + /* completing autolink test: no whitespace or ' or " */ + if (i >= size) + *autolink = MKDA_NOT_AUTOLINK; + + else if (*autolink) { + j = i; + + while (i < size) { + if (data[i] == '\\') i += 2; + else if (data[i] == '>' || data[i] == '\'' || + data[i] == '"' || data[i] == ' ' || data[i] == '\n') + break; + else i++; + } + + if (i >= size) return 0; + if (i > j && data[i] == '>') return i + 1; + /* one of the forbidden chars has been found */ + *autolink = MKDA_NOT_AUTOLINK; + } + + /* looking for sometinhg looking like a tag end */ + while (i < size && data[i] != '>') i++; + if (i >= size) return 0; + return i + 1; +} + +/* parse_inline • parses inline markdown elements */ +static void +parse_inline(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t i = 0, end = 0; + uint8_t action = 0; + struct buf work = { 0, 0, 0, 0 }; + + if (rndr->work_bufs[BUFFER_SPAN].size + + rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) + return; + + while (i < size) { + /* copying inactive chars into the output */ + while (end < size && (action = rndr->active_char[data[end]]) == 0) { + end++; + } + + if (rndr->cb.normal_text) { + work.data = data + i; + work.size = end - i; + rndr->cb.normal_text(ob, &work, rndr->opaque); + } + else + bufput(ob, data + i, end - i); + + if (end >= size) break; + i = end; + + end = markdown_char_ptrs[(int)action](ob, rndr, data + i, i, size - i); + if (!end) /* no action from the callback */ + end = i + 1; + else { + i += end; + end = i; + } + } +} + +/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ +static size_t +find_emph_char(uint8_t *data, size_t size, uint8_t c) +{ + size_t i = 1; + + while (i < size) { + while (i < size && data[i] != c && data[i] != '`' && data[i] != '[') + i++; + + if (i == size) + return 0; + + if (data[i] == c) + return i; + + /* not counting escaped chars */ + if (i && data[i - 1] == '\\') { + i++; continue; + } + + if (data[i] == '`') { + size_t span_nb = 0, bt; + size_t tmp_i = 0; + + /* counting the number of opening backticks */ + while (i < size && data[i] == '`') { + i++; span_nb++; + } + + if (i >= size) return 0; + + /* finding the matching closing sequence */ + bt = 0; + while (i < size && bt < span_nb) { + if (!tmp_i && data[i] == c) tmp_i = i; + if (data[i] == '`') bt++; + else bt = 0; + i++; + } + + if (i >= size) return tmp_i; + } + /* skipping a link */ + else if (data[i] == '[') { + size_t tmp_i = 0; + uint8_t cc; + + i++; + while (i < size && data[i] != ']') { + if (!tmp_i && data[i] == c) tmp_i = i; + i++; + } + + i++; + while (i < size && (data[i] == ' ' || data[i] == '\n')) + i++; + + if (i >= size) + return tmp_i; + + switch (data[i]) { + case '[': + cc = ']'; break; + + case '(': + cc = ')'; break; + + default: + if (tmp_i) + return tmp_i; + else + continue; + } + + i++; + while (i < size && data[i] != cc) { + if (!tmp_i && data[i] == c) tmp_i = i; + i++; + } + + if (i >= size) + return tmp_i; + + i++; + } + } + + return 0; +} + +/* parse_emph1 • parsing single emphase */ +/* closed by a symbol not preceded by whitespace and not followed by symbol */ +static size_t +parse_emph1(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) +{ + size_t i = 0, len; + struct buf *work = 0; + int r; + + if (!rndr->cb.emphasis) return 0; + + /* skipping one symbol if coming from emph3 */ + if (size > 1 && data[0] == c && data[1] == c) i = 1; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + if (i >= size) return 0; + + if (data[i] == c && !_isspace(data[i - 1])) { + + if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) { + if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1]))) + continue; + } + + work = rndr_newbuf(rndr, BUFFER_SPAN); + parse_inline(work, rndr, data, i); + r = rndr->cb.emphasis(ob, work, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + return r ? i + 1 : 0; + } + } + + return 0; +} + +/* parse_emph2 • parsing single emphase */ +static size_t +parse_emph2(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) +{ + int (*render_method)(struct buf *ob, const struct buf *text, void *opaque); + size_t i = 0, len; + struct buf *work = 0; + int r; + + render_method = (c == '~') ? rndr->cb.strikethrough : rndr->cb.double_emphasis; + + if (!render_method) + return 0; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + + if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { + work = rndr_newbuf(rndr, BUFFER_SPAN); + parse_inline(work, rndr, data, i); + r = render_method(ob, work, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + return r ? i + 2 : 0; + } + i++; + } + return 0; +} + +/* parse_emph3 • parsing single emphase */ +/* finds the first closing tag, and delegates to the other emph */ +static size_t +parse_emph3(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, uint8_t c) +{ + size_t i = 0, len; + int r; + + while (i < size) { + len = find_emph_char(data + i, size - i, c); + if (!len) return 0; + i += len; + + /* skip whitespace preceded symbols */ + if (data[i] != c || _isspace(data[i - 1])) + continue; + + if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && rndr->cb.triple_emphasis) { + /* triple symbol found */ + struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); + + parse_inline(work, rndr, data, i); + r = rndr->cb.triple_emphasis(ob, work, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + return r ? i + 3 : 0; + + } else if (i + 1 < size && data[i + 1] == c) { + /* double symbol found, handing over to emph1 */ + len = parse_emph1(ob, rndr, data - 2, size + 2, c); + if (!len) return 0; + else return len - 2; + + } else { + /* single symbol found, handing over to emph2 */ + len = parse_emph2(ob, rndr, data - 1, size + 1, c); + if (!len) return 0; + else return len - 1; + } + } + return 0; +} + +/* char_emphasis • single and double emphasis parsing */ +static size_t +char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + uint8_t c = data[0]; + size_t ret; + + if (size > 2 && data[1] != c) { + /* whitespace cannot follow an opening emphasis; + * strikethrough only takes two characters '~~' */ + if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) + return 0; + + return ret + 1; + } + + if (size > 3 && data[1] == c && data[2] != c) { + if (_isspace(data[2]) || (ret = parse_emph2(ob, rndr, data + 2, size - 2, c)) == 0) + return 0; + + return ret + 2; + } + + if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { + if (c == '~' || _isspace(data[3]) || (ret = parse_emph3(ob, rndr, data + 3, size - 3, c)) == 0) + return 0; + + return ret + 3; + } + + return 0; +} + + +/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ +static size_t +char_linebreak(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') + return 0; + + /* removing the last space from ob and rendering */ + while (ob->size && ob->data[ob->size - 1] == ' ') + ob->size--; + + return rndr->cb.linebreak(ob, rndr->opaque) ? 1 : 0; +} + + +/* char_codespan • '`' parsing a code span (assuming codespan != 0) */ +static size_t +char_codespan(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + size_t end, nb = 0, i, f_begin, f_end; + + /* counting the number of backticks in the delimiter */ + while (nb < size && data[nb] == '`') + nb++; + + /* finding the next delimiter */ + i = 0; + for (end = nb; end < size && i < nb; end++) { + if (data[end] == '`') i++; + else i = 0; + } + + if (i < nb && end >= size) + return 0; /* no matching delimiter */ + + /* trimming outside whitespaces */ + f_begin = nb; + while (f_begin < end && data[f_begin] == ' ') + f_begin++; + + f_end = end - nb; + while (f_end > nb && data[f_end-1] == ' ') + f_end--; + + /* real code span */ + if (f_begin < f_end) { + struct buf work = { data + f_begin, f_end - f_begin, 0, 0 }; + if (!rndr->cb.codespan(ob, &work, rndr->opaque)) + end = 0; + } else { + if (!rndr->cb.codespan(ob, 0, rndr->opaque)) + end = 0; + } + + return end; +} + + +/* char_escape • '\\' backslash escape */ +static size_t +char_escape(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~"; + struct buf work = { 0, 0, 0, 0 }; + + if (size > 1) { + if (strchr(escape_chars, data[1]) == NULL) + return 0; + + if (rndr->cb.normal_text) { + work.data = data + 1; + work.size = 1; + rndr->cb.normal_text(ob, &work, rndr->opaque); + } + else bufputc(ob, data[1]); + } else if (size == 1) { + bufputc(ob, data[0]); + } + + return 2; +} + +/* char_entity • '&' escaped when it doesn't belong to an entity */ +/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */ +static size_t +char_entity(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + size_t end = 1; + struct buf work = { 0, 0, 0, 0 }; + + if (end < size && data[end] == '#') + end++; + + while (end < size && isalnum(data[end])) + end++; + + if (end < size && data[end] == ';') + end++; /* real entity */ + else + return 0; /* lone '&' */ + + if (rndr->cb.entity) { + work.data = data; + work.size = end; + rndr->cb.entity(ob, &work, rndr->opaque); + } + else bufput(ob, data, end); + + return end; +} + +/* char_langle_tag • '<' when tags or autolinks are allowed */ +static size_t +char_langle_tag(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + enum mkd_autolink altype = MKDA_NOT_AUTOLINK; + size_t end = tag_length(data, size, &altype); + struct buf work = { data, end, 0, 0 }; + int ret = 0; + + if (end > 2) { + if (rndr->cb.autolink && altype != MKDA_NOT_AUTOLINK) { + struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN); + work.data = data + 1; + work.size = end - 2; + unscape_text(u_link, &work); + ret = rndr->cb.autolink(ob, u_link, altype, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + } + else if (rndr->cb.raw_html_tag) + ret = rndr->cb.raw_html_tag(ob, &work, rndr->opaque); + } + + if (!ret) return 0; + else return end; +} + +static size_t +char_autolink_www(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + struct buf *link, *link_url, *link_text; + size_t link_len, rewind; + + if (!rndr->cb.link || rndr->in_link_body) + return 0; + + link = rndr_newbuf(rndr, BUFFER_SPAN); + + if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) { + link_url = rndr_newbuf(rndr, BUFFER_SPAN); + BUFPUTSL(link_url, "http://"); + bufput(link_url, link->data, link->size); + + ob->size -= rewind; + if (rndr->cb.normal_text) { + link_text = rndr_newbuf(rndr, BUFFER_SPAN); + rndr->cb.normal_text(link_text, link, rndr->opaque); + rndr->cb.link(ob, link_url, NULL, link_text, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + } else { + rndr->cb.link(ob, link_url, NULL, link, rndr->opaque); + } + rndr_popbuf(rndr, BUFFER_SPAN); + } + + rndr_popbuf(rndr, BUFFER_SPAN); + return link_len; +} + +static size_t +char_autolink_email(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + struct buf *link; + size_t link_len, rewind; + + if (!rndr->cb.autolink || rndr->in_link_body) + return 0; + + link = rndr_newbuf(rndr, BUFFER_SPAN); + + if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) { + ob->size -= rewind; + rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque); + } + + rndr_popbuf(rndr, BUFFER_SPAN); + return link_len; +} + +static size_t +char_autolink_url(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + struct buf *link; + size_t link_len, rewind; + + if (!rndr->cb.autolink || rndr->in_link_body) + return 0; + + link = rndr_newbuf(rndr, BUFFER_SPAN); + + if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) { + ob->size -= rewind; + rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque); + } + + rndr_popbuf(rndr, BUFFER_SPAN); + return link_len; +} + +/* char_link • '[': parsing a link or an image */ +static size_t +char_link(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + int is_img = (offset && data[-1] == '!'), level; + size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; + struct buf *content = 0; + struct buf *link = 0; + struct buf *title = 0; + struct buf *u_link = 0; + size_t org_work_size = rndr->work_bufs[BUFFER_SPAN].size; + int text_has_nl = 0, ret = 0; + int in_title = 0, qtype = 0; + + /* checking whether the correct renderer exists */ + if ((is_img && !rndr->cb.image) || (!is_img && !rndr->cb.link)) + goto cleanup; + + /* looking for the matching closing bracket */ + for (level = 1; i < size; i++) { + if (data[i] == '\n') + text_has_nl = 1; + + else if (data[i - 1] == '\\') + continue; + + else if (data[i] == '[') + level++; + + else if (data[i] == ']') { + level--; + if (level <= 0) + break; + } + } + + if (i >= size) + goto cleanup; + + txt_e = i; + i++; + + /* skip any amount of whitespace or newline */ + /* (this is much more laxist than original markdown syntax) */ + while (i < size && _isspace(data[i])) + i++; + + /* inline style link */ + if (i < size && data[i] == '(') { + /* skipping initial whitespace */ + i++; + + while (i < size && _isspace(data[i])) + i++; + + link_b = i; + + /* looking for link end: ' " ) */ + while (i < size) { + if (data[i] == '\\') i += 2; + else if (data[i] == ')') break; + else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; + else i++; + } + + if (i >= size) goto cleanup; + link_e = i; + + /* looking for title end if present */ + if (data[i] == '\'' || data[i] == '"') { + qtype = data[i]; + in_title = 1; + i++; + title_b = i; + + while (i < size) { + if (data[i] == '\\') i += 2; + else if (data[i] == qtype) {in_title = 0; i++;} + else if ((data[i] == ')') && !in_title) break; + else i++; + } + + if (i >= size) goto cleanup; + + /* skipping whitespaces after title */ + title_e = i - 1; + while (title_e > title_b && _isspace(data[title_e])) + title_e--; + + /* checking for closing quote presence */ + if (data[title_e] != '\'' && data[title_e] != '"') { + title_b = title_e = 0; + link_e = i; + } + } + + /* remove whitespace at the end of the link */ + while (link_e > link_b && _isspace(data[link_e - 1])) + link_e--; + + /* remove optional angle brackets around the link */ + if (data[link_b] == '<') link_b++; + if (data[link_e - 1] == '>') link_e--; + + /* building escaped link and title */ + if (link_e > link_b) { + link = rndr_newbuf(rndr, BUFFER_SPAN); + bufput(link, data + link_b, link_e - link_b); + } + + if (title_e > title_b) { + title = rndr_newbuf(rndr, BUFFER_SPAN); + bufput(title, data + title_b, title_e - title_b); + } + + i++; + } + + /* reference style link */ + else if (i < size && data[i] == '[') { + struct buf id = { 0, 0, 0, 0 }; + struct link_ref *lr; + + /* looking for the id */ + i++; + link_b = i; + while (i < size && data[i] != ']') i++; + if (i >= size) goto cleanup; + link_e = i; + + /* finding the link_ref */ + if (link_b == link_e) { + if (text_has_nl) { + struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); + size_t j; + + for (j = 1; j < txt_e; j++) { + if (data[j] != '\n') + bufputc(b, data[j]); + else if (data[j - 1] != ' ') + bufputc(b, ' '); + } + + id.data = b->data; + id.size = b->size; + } else { + id.data = data + 1; + id.size = txt_e - 1; + } + } else { + id.data = data + link_b; + id.size = link_e - link_b; + } + + lr = find_link_ref(rndr->refs, id.data, id.size); + if (!lr) + goto cleanup; + + /* keeping link and title from link_ref */ + link = lr->link; + title = lr->title; + i++; + } + + /* shortcut reference style link */ + else { + struct buf id = { 0, 0, 0, 0 }; + struct link_ref *lr; + + /* crafting the id */ + if (text_has_nl) { + struct buf *b = rndr_newbuf(rndr, BUFFER_SPAN); + size_t j; + + for (j = 1; j < txt_e; j++) { + if (data[j] != '\n') + bufputc(b, data[j]); + else if (data[j - 1] != ' ') + bufputc(b, ' '); + } + + id.data = b->data; + id.size = b->size; + } else { + id.data = data + 1; + id.size = txt_e - 1; + } + + /* finding the link_ref */ + lr = find_link_ref(rndr->refs, id.data, id.size); + if (!lr) + goto cleanup; + + /* keeping link and title from link_ref */ + link = lr->link; + title = lr->title; + + /* rewinding the whitespace */ + i = txt_e + 1; + } + + /* building content: img alt is escaped, link content is parsed */ + if (txt_e > 1) { + content = rndr_newbuf(rndr, BUFFER_SPAN); + if (is_img) { + bufput(content, data + 1, txt_e - 1); + } else { + /* disable autolinking when parsing inline the + * content of a link */ + rndr->in_link_body = 1; + parse_inline(content, rndr, data + 1, txt_e - 1); + rndr->in_link_body = 0; + } + } + + if (link) { + u_link = rndr_newbuf(rndr, BUFFER_SPAN); + unscape_text(u_link, link); + } + + /* calling the relevant rendering function */ + if (is_img) { + if (ob->size && ob->data[ob->size - 1] == '!') + ob->size -= 1; + + ret = rndr->cb.image(ob, u_link, title, content, rndr->opaque); + } else { + ret = rndr->cb.link(ob, u_link, title, content, rndr->opaque); + } + + /* cleanup */ +cleanup: + rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size; + return ret ? i : 0; +} + +static size_t +char_superscript(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) +{ + size_t sup_start, sup_len; + struct buf *sup; + + if (!rndr->cb.superscript) + return 0; + + if (size < 2) + return 0; + + if (data[1] == '(') { + sup_start = sup_len = 2; + + while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') + sup_len++; + + if (sup_len == size) + return 0; + } else { + sup_start = sup_len = 1; + + while (sup_len < size && !_isspace(data[sup_len])) + sup_len++; + } + + if (sup_len - sup_start == 0) + return (sup_start == 2) ? 3 : 0; + + sup = rndr_newbuf(rndr, BUFFER_SPAN); + parse_inline(sup, rndr, data + sup_start, sup_len - sup_start); + rndr->cb.superscript(ob, sup, rndr->opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + + return (sup_start == 2) ? sup_len + 1 : sup_len; +} + +/********************************* + * BLOCK-LEVEL PARSING FUNCTIONS * + *********************************/ + +/* is_empty • returns the line length when it is empty, 0 otherwise */ +static size_t +is_empty(uint8_t *data, size_t size) +{ + size_t i; + + for (i = 0; i < size && data[i] != '\n'; i++) + if (data[i] != ' ') + return 0; + + return i + 1; +} + +/* is_hrule • returns whether a line is a horizontal rule */ +static int +is_hrule(uint8_t *data, size_t size) +{ + size_t i = 0, n = 0; + uint8_t c; + + /* skipping initial spaces */ + if (size < 3) return 0; + if (data[0] == ' ') { i++; + if (data[1] == ' ') { i++; + if (data[2] == ' ') { i++; } } } + + /* looking at the hrule uint8_t */ + if (i + 2 >= size + || (data[i] != '*' && data[i] != '-' && data[i] != '_')) + return 0; + c = data[i]; + + /* the whole line must be the char or whitespace */ + while (i < size && data[i] != '\n') { + if (data[i] == c) n++; + else if (data[i] != ' ') + return 0; + + i++; + } + + return n >= 3; +} + +/* check if a line begins with a code fence; return the + * width of the code fence */ +static size_t +prefix_codefence(uint8_t *data, size_t size) +{ + size_t i = 0, n = 0; + uint8_t c; + + /* skipping initial spaces */ + if (size < 3) return 0; + if (data[0] == ' ') { i++; + if (data[1] == ' ') { i++; + if (data[2] == ' ') { i++; } } } + + /* looking at the hrule uint8_t */ + if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) + return 0; + + c = data[i]; + + /* the whole line must be the uint8_t or whitespace */ + while (i < size && data[i] == c) { + n++; i++; + } + + if (n < 3) + return 0; + + return i; +} + +/* check if a line is a code fence; return its size if it is */ +static size_t +is_codefence(uint8_t *data, size_t size, struct buf *syntax) +{ + size_t i = 0, syn_len = 0; + uint8_t *syn_start; + + i = prefix_codefence(data, size); + if (i == 0) + return 0; + + while (i < size && data[i] == ' ') + i++; + + syn_start = data + i; + + if (i < size && data[i] == '{') { + i++; syn_start++; + + while (i < size && data[i] != '}' && data[i] != '\n') { + syn_len++; i++; + } + + if (i == size || data[i] != '}') + return 0; + + /* strip all whitespace at the beginning and the end + * of the {} block */ + while (syn_len > 0 && _isspace(syn_start[0])) { + syn_start++; syn_len--; + } + + while (syn_len > 0 && _isspace(syn_start[syn_len - 1])) + syn_len--; + + i++; + } else { + while (i < size && !_isspace(data[i])) { + syn_len++; i++; + } + } + + if (syntax) { + syntax->data = syn_start; + syntax->size = syn_len; + } + + while (i < size && data[i] != '\n') { + if (!_isspace(data[i])) + return 0; + + i++; + } + + return i + 1; +} + +/* is_atxheader • returns whether the line is a hash-prefixed header */ +static int +is_atxheader(struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + if (data[0] != '#') + return 0; + + if (rndr->ext_flags & MKDEXT_SPACE_HEADERS) { + size_t level = 0; + + while (level < size && level < 6 && data[level] == '#') + level++; + + if (level < size && data[level] != ' ') + return 0; + } + + return 1; +} + +/* is_headerline • returns whether the line is a setext-style hdr underline */ +static int +is_headerline(uint8_t *data, size_t size) +{ + size_t i = 0; + + /* test of level 1 header */ + if (data[i] == '=') { + for (i = 1; i < size && data[i] == '='; i++); + while (i < size && data[i] == ' ') i++; + return (i >= size || data[i] == '\n') ? 1 : 0; } + + /* test of level 2 header */ + if (data[i] == '-') { + for (i = 1; i < size && data[i] == '-'; i++); + while (i < size && data[i] == ' ') i++; + return (i >= size || data[i] == '\n') ? 2 : 0; } + + return 0; +} + +static int +is_next_headerline(uint8_t *data, size_t size) +{ + size_t i = 0; + + while (i < size && data[i] != '\n') + i++; + + if (++i >= size) + return 0; + + return is_headerline(data + i, size - i); +} + +/* prefix_quote • returns blockquote prefix length */ +static size_t +prefix_quote(uint8_t *data, size_t size) +{ + size_t i = 0; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + + if (i < size && data[i] == '>') { + if (i + 1 < size && data[i + 1] == ' ') + return i + 2; + + return i + 1; + } + + return 0; +} + +/* prefix_code • returns prefix length for block code*/ +static size_t +prefix_code(uint8_t *data, size_t size) +{ + if (size > 3 && data[0] == ' ' && data[1] == ' ' + && data[2] == ' ' && data[3] == ' ') return 4; + + return 0; +} + +/* prefix_oli • returns ordered list item prefix */ +static size_t +prefix_oli(uint8_t *data, size_t size) +{ + size_t i = 0; + + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + + if (i >= size || data[i] < '0' || data[i] > '9') + return 0; + + while (i < size && data[i] >= '0' && data[i] <= '9') + i++; + + if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') + return 0; + + if (is_next_headerline(data + i, size - i)) + return 0; + + return i + 2; +} + +/* prefix_uli • returns ordered list item prefix */ +static size_t +prefix_uli(uint8_t *data, size_t size) +{ + size_t i = 0; + + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + + if (i + 1 >= size || + (data[i] != '*' && data[i] != '+' && data[i] != '-') || + data[i + 1] != ' ') + return 0; + + if (is_next_headerline(data + i, size - i)) + return 0; + + return i + 2; +} + + +/* parse_block • parsing of one block, returning next uint8_t to parse */ +static void parse_block(struct buf *ob, struct sd_markdown *rndr, + uint8_t *data, size_t size); + + +/* parse_blockquote • handles parsing of a blockquote fragment */ +static size_t +parse_blockquote(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t beg, end = 0, pre, work_size = 0; + uint8_t *work_data = 0; + struct buf *out = 0; + + out = rndr_newbuf(rndr, BUFFER_BLOCK); + beg = 0; + while (beg < size) { + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); + + pre = prefix_quote(data + beg, end - beg); + + if (pre) + beg += pre; /* skipping prefix */ + + /* empty line followed by non-quote line */ + else if (is_empty(data + beg, end - beg) && + (end >= size || (prefix_quote(data + end, size - end) == 0 && + !is_empty(data + end, size - end)))) + break; + + if (beg < end) { /* copy into the in-place working buffer */ + /* bufput(work, data + beg, end - beg); */ + if (!work_data) + work_data = data + beg; + else if (data + beg != work_data + work_size) + memmove(work_data + work_size, data + beg, end - beg); + work_size += end - beg; + } + beg = end; + } + + parse_block(out, rndr, work_data, work_size); + if (rndr->cb.blockquote) + rndr->cb.blockquote(ob, out, rndr->opaque); + rndr_popbuf(rndr, BUFFER_BLOCK); + return end; +} + +static size_t +parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render); + +/* parse_blockquote • handles parsing of a regular paragraph */ +static size_t +parse_paragraph(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t i = 0, end = 0; + int level = 0; + struct buf work = { data, 0, 0, 0 }; + + while (i < size) { + for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; + + if (is_empty(data + i, size - i)) + break; + + if ((level = is_headerline(data + i, size - i)) != 0) + break; + + if (is_atxheader(rndr, data + i, size - i) || + is_hrule(data + i, size - i) || + prefix_quote(data + i, size - i)) { + end = i; + break; + } + + /* + * Early termination of a paragraph with the same logic + * as Markdown 1.0.0. If this logic is applied, the + * Markdown 1.0.3 test suite won't pass cleanly + * + * :: If the first character in a new line is not a letter, + * let's check to see if there's some kind of block starting + * here + */ + if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) { + if (prefix_oli(data + i, size - i) || + prefix_uli(data + i, size - i)) { + end = i; + break; + } + + /* see if an html block starts here */ + if (data[i] == '<' && rndr->cb.blockhtml && + parse_htmlblock(ob, rndr, data + i, size - i, 0)) { + end = i; + break; + } + + /* see if a code fence starts here */ + if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && + is_codefence(data + i, size - i, NULL) != 0) { + end = i; + break; + } + } + + i = end; + } + + work.size = i; + while (work.size && data[work.size - 1] == '\n') + work.size--; + + if (!level) { + struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); + parse_inline(tmp, rndr, work.data, work.size); + if (rndr->cb.paragraph) + rndr->cb.paragraph(ob, tmp, rndr->opaque); + rndr_popbuf(rndr, BUFFER_BLOCK); + } else { + struct buf *header_work; + + if (work.size) { + size_t beg; + i = work.size; + work.size -= 1; + + while (work.size && data[work.size] != '\n') + work.size -= 1; + + beg = work.size + 1; + while (work.size && data[work.size - 1] == '\n') + work.size -= 1; + + if (work.size > 0) { + struct buf *tmp = rndr_newbuf(rndr, BUFFER_BLOCK); + parse_inline(tmp, rndr, work.data, work.size); + + if (rndr->cb.paragraph) + rndr->cb.paragraph(ob, tmp, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_BLOCK); + work.data += beg; + work.size = i - beg; + } + else work.size = i; + } + + header_work = rndr_newbuf(rndr, BUFFER_SPAN); + parse_inline(header_work, rndr, work.data, work.size); + + if (rndr->cb.header) + rndr->cb.header(ob, header_work, (int)level, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_SPAN); + } + + return end; +} + +/* parse_fencedcode • handles parsing of a block-level code fragment */ +static size_t +parse_fencedcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t beg, end; + struct buf *work = 0; + struct buf lang = { 0, 0, 0, 0 }; + + beg = is_codefence(data, size, &lang); + if (beg == 0) return 0; + + work = rndr_newbuf(rndr, BUFFER_BLOCK); + + while (beg < size) { + size_t fence_end; + struct buf fence_trail = { 0, 0, 0, 0 }; + + fence_end = is_codefence(data + beg, size - beg, &fence_trail); + if (fence_end != 0 && fence_trail.size == 0) { + beg += fence_end; + break; + } + + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); + + if (beg < end) { + /* verbatim copy to the working buffer, + escaping entities */ + if (is_empty(data + beg, end - beg)) + bufputc(work, '\n'); + else bufput(work, data + beg, end - beg); + } + beg = end; + } + + if (work->size && work->data[work->size - 1] != '\n') + bufputc(work, '\n'); + + if (rndr->cb.blockcode) + rndr->cb.blockcode(ob, work, lang.size ? &lang : NULL, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_BLOCK); + return beg; +} + +static size_t +parse_blockcode(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t beg, end, pre; + struct buf *work = 0; + + work = rndr_newbuf(rndr, BUFFER_BLOCK); + + beg = 0; + while (beg < size) { + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; + pre = prefix_code(data + beg, end - beg); + + if (pre) + beg += pre; /* skipping prefix */ + else if (!is_empty(data + beg, end - beg)) + /* non-empty non-prefixed line breaks the pre */ + break; + + if (beg < end) { + /* verbatim copy to the working buffer, + escaping entities */ + if (is_empty(data + beg, end - beg)) + bufputc(work, '\n'); + else bufput(work, data + beg, end - beg); + } + beg = end; + } + + while (work->size && work->data[work->size - 1] == '\n') + work->size -= 1; + + bufputc(work, '\n'); + + if (rndr->cb.blockcode) + rndr->cb.blockcode(ob, work, NULL, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_BLOCK); + return beg; +} + +/* parse_listitem • parsing of a single list item */ +/* assuming initial prefix is already removed */ +static size_t +parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags) +{ + struct buf *work = 0, *inter = 0; + size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; + int in_empty = 0, has_inside_empty = 0, in_fence = 0; + + /* keeping track of the first indentation prefix */ + while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') + orgpre++; + + beg = prefix_uli(data, size); + if (!beg) + beg = prefix_oli(data, size); + + if (!beg) + return 0; + + /* skipping to the beginning of the following line */ + end = beg; + while (end < size && data[end - 1] != '\n') + end++; + + /* getting working buffers */ + work = rndr_newbuf(rndr, BUFFER_SPAN); + inter = rndr_newbuf(rndr, BUFFER_SPAN); + + /* putting the first line into the working buffer */ + bufput(work, data + beg, end - beg); + beg = end; + + /* process the following lines */ + while (beg < size) { + size_t has_next_uli = 0, has_next_oli = 0; + + end++; + + while (end < size && data[end - 1] != '\n') + end++; + + /* process an empty line */ + if (is_empty(data + beg, end - beg)) { + in_empty = 1; + beg = end; + continue; + } + + /* calculating the indentation */ + i = 0; + while (i < 4 && beg + i < end && data[beg + i] == ' ') + i++; + + pre = i; + + if (rndr->ext_flags & MKDEXT_FENCED_CODE) { + if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) + in_fence = !in_fence; + } + + /* Only check for new list items if we are **not** inside + * a fenced code block */ + if (!in_fence) { + has_next_uli = prefix_uli(data + beg + i, end - beg - i); + has_next_oli = prefix_oli(data + beg + i, end - beg - i); + } + + /* checking for ul/ol switch */ + if (in_empty && ( + ((*flags & MKD_LIST_ORDERED) && has_next_uli) || + (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){ + *flags |= MKD_LI_END; + break; /* the following item must have same list type */ + } + + /* checking for a new item */ + if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || has_next_oli) { + if (in_empty) + has_inside_empty = 1; + + if (pre == orgpre) /* the following item must have */ + break; /* the same indentation */ + + if (!sublist) + sublist = work->size; + } + /* joining only indented stuff after empty lines; + * note that now we only require 1 space of indentation + * to continue a list */ + else if (in_empty && pre == 0) { + *flags |= MKD_LI_END; + break; + } + else if (in_empty) { + bufputc(work, '\n'); + has_inside_empty = 1; + } + + in_empty = 0; + + /* adding the line without prefix into the working buffer */ + bufput(work, data + beg + i, end - beg - i); + beg = end; + } + + /* render of li contents */ + if (has_inside_empty) + *flags |= MKD_LI_BLOCK; + + if (*flags & MKD_LI_BLOCK) { + /* intermediate render of block li */ + if (sublist && sublist < work->size) { + parse_block(inter, rndr, work->data, sublist); + parse_block(inter, rndr, work->data + sublist, work->size - sublist); + } + else + parse_block(inter, rndr, work->data, work->size); + } else { + /* intermediate render of inline li */ + if (sublist && sublist < work->size) { + parse_inline(inter, rndr, work->data, sublist); + parse_block(inter, rndr, work->data + sublist, work->size - sublist); + } + else + parse_inline(inter, rndr, work->data, work->size); + } + + /* render of li itself */ + if (rndr->cb.listitem) + rndr->cb.listitem(ob, inter, *flags, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_SPAN); + rndr_popbuf(rndr, BUFFER_SPAN); + return beg; +} + + +/* parse_list • parsing ordered or unordered list block */ +static size_t +parse_list(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int flags) +{ + struct buf *work = 0; + size_t i = 0, j; + + work = rndr_newbuf(rndr, BUFFER_BLOCK); + + while (i < size) { + j = parse_listitem(work, rndr, data + i, size - i, &flags); + i += j; + + if (!j || (flags & MKD_LI_END)) + break; + } + + if (rndr->cb.list) + rndr->cb.list(ob, work, flags, rndr->opaque); + rndr_popbuf(rndr, BUFFER_BLOCK); + return i; +} + +/* parse_atxheader • parsing of atx-style headers */ +static size_t +parse_atxheader(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t level = 0; + size_t i, end, skip; + + while (level < size && level < 6 && data[level] == '#') + level++; + + for (i = level; i < size && data[i] == ' '; i++); + + for (end = i; end < size && data[end] != '\n'; end++); + skip = end; + + while (end && data[end - 1] == '#') + end--; + + while (end && data[end - 1] == ' ') + end--; + + if (end > i) { + struct buf *work = rndr_newbuf(rndr, BUFFER_SPAN); + + parse_inline(work, rndr, data + i, end - i); + + if (rndr->cb.header) + rndr->cb.header(ob, work, (int)level, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_SPAN); + } + + return skip; +} + + +/* htmlblock_end • checking end of HTML block : [ \t]*\n[ \t*]\n */ +/* returns the length on match, 0 otherwise */ +static size_t +htmlblock_end_tag( + const char *tag, + size_t tag_len, + struct sd_markdown *rndr, + uint8_t *data, + size_t size) +{ + size_t i, w; + + /* checking if tag is a match */ + if (tag_len + 3 >= size || + strncasecmp((char *)data + 2, tag, tag_len) != 0 || + data[tag_len + 2] != '>') + return 0; + + /* checking white lines */ + i = tag_len + 3; + w = 0; + if (i < size && (w = is_empty(data + i, size - i)) == 0) + return 0; /* non-blank after tag */ + i += w; + w = 0; + + if (i < size) + w = is_empty(data + i, size - i); + + return i + w; +} + +static size_t +htmlblock_end(const char *curtag, + struct sd_markdown *rndr, + uint8_t *data, + size_t size, + int start_of_line) +{ + size_t tag_size = strlen(curtag); + size_t i = 1, end_tag; + int block_lines = 0; + + while (i < size) { + i++; + while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { + if (data[i] == '\n') + block_lines++; + + i++; + } + + /* If we are only looking for unindented tags, skip the tag + * if it doesn't follow a newline. + * + * The only exception to this is if the tag is still on the + * initial line; in that case it still counts as a closing + * tag + */ + if (start_of_line && block_lines > 0 && data[i - 2] != '\n') + continue; + + if (i + 2 + tag_size >= size) + break; + + end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1); + if (end_tag) + return i + end_tag - 1; + } + + return 0; +} + + +/* parse_htmlblock • parsing of inline HTML block */ +static size_t +parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render) +{ + size_t i, j = 0, tag_end; + const char *curtag = NULL; + struct buf work = { data, 0, 0, 0 }; + + /* identification of the opening tag */ + if (size < 2 || data[0] != '<') + return 0; + + i = 1; + while (i < size && data[i] != '>' && data[i] != ' ') + i++; + + if (i < size) + curtag = find_block_tag((char *)data + 1, (int)i - 1); + + /* handling of special cases */ + if (!curtag) { + + /* HTML comment, laxist form */ + if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { + i = 5; + + while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) + i++; + + i++; + + if (i < size) + j = is_empty(data + i, size - i); + + if (j) { + work.size = i + j; + if (do_render && rndr->cb.blockhtml) + rndr->cb.blockhtml(ob, &work, rndr->opaque); + return work.size; + } + } + + /* HR, which is the only self-closing block tag considered */ + if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { + i = 3; + while (i < size && data[i] != '>') + i++; + + if (i + 1 < size) { + i++; + j = is_empty(data + i, size - i); + if (j) { + work.size = i + j; + if (do_render && rndr->cb.blockhtml) + rndr->cb.blockhtml(ob, &work, rndr->opaque); + return work.size; + } + } + } + + /* no special case recognised */ + return 0; + } + + /* looking for an unindented matching closing tag */ + /* followed by a blank line */ + tag_end = htmlblock_end(curtag, rndr, data, size, 1); + + /* if not found, trying a second pass looking for indented match */ + /* but not if tag is "ins" or "del" (following original Markdown.pl) */ + if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { + tag_end = htmlblock_end(curtag, rndr, data, size, 0); + } + + if (!tag_end) + return 0; + + /* the end of the block has been found */ + work.size = tag_end; + if (do_render && rndr->cb.blockhtml) + rndr->cb.blockhtml(ob, &work, rndr->opaque); + + return tag_end; +} + +static void +parse_table_row( + struct buf *ob, + struct sd_markdown *rndr, + uint8_t *data, + size_t size, + size_t columns, + int *col_data, + int header_flag) +{ + size_t i = 0, col; + struct buf *row_work = 0; + + if (!rndr->cb.table_cell || !rndr->cb.table_row) + return; + + row_work = rndr_newbuf(rndr, BUFFER_SPAN); + + if (i < size && data[i] == '|') + i++; + + for (col = 0; col < columns && i < size; ++col) { + size_t cell_start, cell_end; + struct buf *cell_work; + + cell_work = rndr_newbuf(rndr, BUFFER_SPAN); + + while (i < size && _isspace(data[i])) + i++; + + cell_start = i; + + while (i < size && data[i] != '|') + i++; + + cell_end = i - 1; + + while (cell_end > cell_start && _isspace(data[cell_end])) + cell_end--; + + parse_inline(cell_work, rndr, data + cell_start, 1 + cell_end - cell_start); + rndr->cb.table_cell(row_work, cell_work, col_data[col] | header_flag, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_SPAN); + i++; + } + + for (; col < columns; ++col) { + struct buf empty_cell = { 0, 0, 0, 0 }; + rndr->cb.table_cell(row_work, &empty_cell, col_data[col] | header_flag, rndr->opaque); + } + + rndr->cb.table_row(ob, row_work, rndr->opaque); + + rndr_popbuf(rndr, BUFFER_SPAN); +} + +static size_t +parse_table_header( + struct buf *ob, + struct sd_markdown *rndr, + uint8_t *data, + size_t size, + size_t *columns, + int **column_data) +{ + int pipes; + size_t i = 0, col, header_end, under_end; + + pipes = 0; + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (i == size || pipes == 0) + return 0; + + header_end = i; + + while (header_end > 0 && _isspace(data[header_end - 1])) + header_end--; + + if (data[0] == '|') + pipes--; + + if (header_end && data[header_end - 1] == '|') + pipes--; + + *columns = pipes + 1; + *column_data = calloc(*columns, sizeof(int)); + + /* Parse the header underline */ + i++; + if (i < size && data[i] == '|') + i++; + + under_end = i; + while (under_end < size && data[under_end] != '\n') + under_end++; + + for (col = 0; col < *columns && i < under_end; ++col) { + size_t dashes = 0; + + while (i < under_end && data[i] == ' ') + i++; + + if (data[i] == ':') { + i++; (*column_data)[col] |= MKD_TABLE_ALIGN_L; + dashes++; + } + + while (i < under_end && data[i] == '-') { + i++; dashes++; + } + + if (i < under_end && data[i] == ':') { + i++; (*column_data)[col] |= MKD_TABLE_ALIGN_R; + dashes++; + } + + while (i < under_end && data[i] == ' ') + i++; + + if (i < under_end && data[i] != '|') + break; + + if (dashes < 3) + break; + + i++; + } + + if (col < *columns) + return 0; + + parse_table_row( + ob, rndr, data, + header_end, + *columns, + *column_data, + MKD_TABLE_HEADER + ); + + return under_end + 1; +} + +static size_t +parse_table( + struct buf *ob, + struct sd_markdown *rndr, + uint8_t *data, + size_t size) +{ + size_t i; + + struct buf *header_work = 0; + struct buf *body_work = 0; + + size_t columns; + int *col_data = NULL; + + header_work = rndr_newbuf(rndr, BUFFER_SPAN); + body_work = rndr_newbuf(rndr, BUFFER_BLOCK); + + i = parse_table_header(header_work, rndr, data, size, &columns, &col_data); + if (i > 0) { + + while (i < size) { + size_t row_start; + int pipes = 0; + + row_start = i; + + while (i < size && data[i] != '\n') + if (data[i++] == '|') + pipes++; + + if (pipes == 0 || i == size) { + i = row_start; + break; + } + + parse_table_row( + body_work, + rndr, + data + row_start, + i - row_start, + columns, + col_data, 0 + ); + + i++; + } + + if (rndr->cb.table) + rndr->cb.table(ob, header_work, body_work, rndr->opaque); + } + + free(col_data); + rndr_popbuf(rndr, BUFFER_SPAN); + rndr_popbuf(rndr, BUFFER_BLOCK); + return i; +} + +/* parse_block • parsing of one block, returning next uint8_t to parse */ +static void +parse_block(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size) +{ + size_t beg, end, i; + uint8_t *txt_data; + beg = 0; + + if (rndr->work_bufs[BUFFER_SPAN].size + + rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting) + return; + + while (beg < size) { + txt_data = data + beg; + end = size - beg; + + if (is_atxheader(rndr, txt_data, end)) + beg += parse_atxheader(ob, rndr, txt_data, end); + + else if (data[beg] == '<' && rndr->cb.blockhtml && + (i = parse_htmlblock(ob, rndr, txt_data, end, 1)) != 0) + beg += i; + + else if ((i = is_empty(txt_data, end)) != 0) + beg += i; + + else if (is_hrule(txt_data, end)) { + if (rndr->cb.hrule) + rndr->cb.hrule(ob, rndr->opaque); + + while (beg < size && data[beg] != '\n') + beg++; + + beg++; + } + + else if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && + (i = parse_fencedcode(ob, rndr, txt_data, end)) != 0) + beg += i; + + else if ((rndr->ext_flags & MKDEXT_TABLES) != 0 && + (i = parse_table(ob, rndr, txt_data, end)) != 0) + beg += i; + + else if (prefix_quote(txt_data, end)) + beg += parse_blockquote(ob, rndr, txt_data, end); + + else if (prefix_code(txt_data, end)) + beg += parse_blockcode(ob, rndr, txt_data, end); + + else if (prefix_uli(txt_data, end)) + beg += parse_list(ob, rndr, txt_data, end, 0); + + else if (prefix_oli(txt_data, end)) + beg += parse_list(ob, rndr, txt_data, end, MKD_LIST_ORDERED); + + else + beg += parse_paragraph(ob, rndr, txt_data, end); + } +} + + + +/********************* + * REFERENCE PARSING * + *********************/ + +/* is_ref • returns whether a line is a reference or not */ +static int +is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) +{ +/* int n; */ + size_t i = 0; + size_t id_offset, id_end; + size_t link_offset, link_end; + size_t title_offset, title_end; + size_t line_end; + + /* up to 3 optional leading spaces */ + if (beg + 3 >= end) return 0; + if (data[beg] == ' ') { i = 1; + if (data[beg + 1] == ' ') { i = 2; + if (data[beg + 2] == ' ') { i = 3; + if (data[beg + 3] == ' ') return 0; } } } + i += beg; + + /* id part: anything but a newline between brackets */ + if (data[i] != '[') return 0; + i++; + id_offset = i; + while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') + i++; + if (i >= end || data[i] != ']') return 0; + id_end = i; + + /* spacer: colon (space | tab)* newline? (space | tab)* */ + i++; + if (i >= end || data[i] != ':') return 0; + i++; + while (i < end && data[i] == ' ') i++; + if (i < end && (data[i] == '\n' || data[i] == '\r')) { + i++; + if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } + while (i < end && data[i] == ' ') i++; + if (i >= end) return 0; + + /* link: whitespace-free sequence, optionally between angle brackets */ + if (data[i] == '<') + i++; + + link_offset = i; + + while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') + i++; + + if (data[i - 1] == '>') link_end = i - 1; + else link_end = i; + + /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ + while (i < end && data[i] == ' ') i++; + if (i < end && data[i] != '\n' && data[i] != '\r' + && data[i] != '\'' && data[i] != '"' && data[i] != '(') + return 0; + line_end = 0; + /* computing end-of-line */ + if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + line_end = i + 1; + + /* optional (space|tab)* spacer after a newline */ + if (line_end) { + i = line_end + 1; + while (i < end && data[i] == ' ') i++; } + + /* optional title: any non-newline sequence enclosed in '"() + alone on its line */ + title_offset = title_end = 0; + if (i + 1 < end + && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { + i++; + title_offset = i; + /* looking for EOL */ + while (i < end && data[i] != '\n' && data[i] != '\r') i++; + if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') + title_end = i + 1; + else title_end = i; + /* stepping back */ + i -= 1; + while (i > title_offset && data[i] == ' ') + i -= 1; + if (i > title_offset + && (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { + line_end = title_end; + title_end = i; } } + + if (!line_end || link_end == link_offset) + return 0; /* garbage after the link empty link */ + + /* a valid ref has been found, filling-in return structures */ + if (last) + *last = line_end; + + if (refs) { + struct link_ref *ref; + + ref = add_link_ref(refs, data + id_offset, id_end - id_offset); + if (!ref) + return 0; + + ref->link = bufnew(link_end - link_offset); + bufput(ref->link, data + link_offset, link_end - link_offset); + + if (title_end > title_offset) { + ref->title = bufnew(title_end - title_offset); + bufput(ref->title, data + title_offset, title_end - title_offset); + } + } + + return 1; +} + +static void expand_tabs(struct buf *ob, const uint8_t *line, size_t size) +{ + size_t i = 0, tab = 0; + + while (i < size) { + size_t org = i; + + while (i < size && line[i] != '\t') { + i++; tab++; + } + + if (i > org) + bufput(ob, line + org, i - org); + + if (i >= size) + break; + + do { + bufputc(ob, ' '); tab++; + } while (tab % 4); + + i++; + } +} + +/********************** + * EXPORTED FUNCTIONS * + **********************/ + +struct sd_markdown * +sd_markdown_new( + unsigned int extensions, + size_t max_nesting, + const struct sd_callbacks *callbacks, + void *opaque) +{ + struct sd_markdown *md = NULL; + + assert(max_nesting > 0 && callbacks); + + md = malloc(sizeof(struct sd_markdown)); + if (!md) + return NULL; + + memcpy(&md->cb, callbacks, sizeof(struct sd_callbacks)); + + stack_init(&md->work_bufs[BUFFER_BLOCK], 4); + stack_init(&md->work_bufs[BUFFER_SPAN], 8); + + memset(md->active_char, 0x0, 256); + + if (md->cb.emphasis || md->cb.double_emphasis || md->cb.triple_emphasis) { + md->active_char['*'] = MD_CHAR_EMPHASIS; + md->active_char['_'] = MD_CHAR_EMPHASIS; + if (extensions & MKDEXT_STRIKETHROUGH) + md->active_char['~'] = MD_CHAR_EMPHASIS; + } + + if (md->cb.codespan) + md->active_char['`'] = MD_CHAR_CODESPAN; + + if (md->cb.linebreak) + md->active_char['\n'] = MD_CHAR_LINEBREAK; + + if (md->cb.image || md->cb.link) + md->active_char['['] = MD_CHAR_LINK; + + md->active_char['<'] = MD_CHAR_LANGLE; + md->active_char['\\'] = MD_CHAR_ESCAPE; + md->active_char['&'] = MD_CHAR_ENTITITY; + + if (extensions & MKDEXT_AUTOLINK) { + md->active_char[':'] = MD_CHAR_AUTOLINK_URL; + md->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; + md->active_char['w'] = MD_CHAR_AUTOLINK_WWW; + } + + if (extensions & MKDEXT_SUPERSCRIPT) + md->active_char['^'] = MD_CHAR_SUPERSCRIPT; + + /* Extension data */ + md->ext_flags = extensions; + md->opaque = opaque; + md->max_nesting = max_nesting; + md->in_link_body = 0; + + return md; +} + +void +sd_markdown_render(struct buf *ob, const uint8_t *document, size_t doc_size, struct sd_markdown *md) +{ +#define MARKDOWN_GROW(x) ((x) + ((x) >> 1)) + static const char UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; + + struct buf *text; + size_t beg, end; + + text = bufnew(64); + if (!text) + return; + + /* Preallocate enough space for our buffer to avoid expanding while copying */ + bufgrow(text, doc_size); + + /* reset the references table */ + memset(&md->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); + + /* first pass: looking for references, copying everything else */ + beg = 0; + + /* Skip a possible UTF-8 BOM, even though the Unicode standard + * discourages having these in UTF-8 documents */ + if (doc_size >= 3 && memcmp(document, UTF8_BOM, 3) == 0) + beg += 3; + + while (beg < doc_size) /* iterating over lines */ + if (is_ref(document, beg, doc_size, &end, md->refs)) + beg = end; + else { /* skipping to the next line */ + end = beg; + while (end < doc_size && document[end] != '\n' && document[end] != '\r') + end++; + + /* adding the line body if present */ + if (end > beg) + expand_tabs(text, document + beg, end - beg); + + while (end < doc_size && (document[end] == '\n' || document[end] == '\r')) { + /* add one \n per newline */ + if (document[end] == '\n' || (end + 1 < doc_size && document[end + 1] != '\n')) + bufputc(text, '\n'); + end++; + } + + beg = end; + } + + /* pre-grow the output buffer to minimize allocations */ + bufgrow(ob, MARKDOWN_GROW(text->size)); + + /* second pass: actual rendering */ + if (md->cb.doc_header) + md->cb.doc_header(ob, md->opaque); + + if (text->size) { + /* adding a final newline if not already present */ + if (text->data[text->size - 1] != '\n' && text->data[text->size - 1] != '\r') + bufputc(text, '\n'); + + parse_block(ob, md, text->data, text->size); + } + + if (md->cb.doc_footer) + md->cb.doc_footer(ob, md->opaque); + + /* clean-up */ + bufrelease(text); + free_link_refs(md->refs); + + assert(md->work_bufs[BUFFER_SPAN].size == 0); + assert(md->work_bufs[BUFFER_BLOCK].size == 0); +} + +void +sd_markdown_free(struct sd_markdown *md) +{ + size_t i; + + for (i = 0; i < (size_t)md->work_bufs[BUFFER_SPAN].asize; ++i) + bufrelease(md->work_bufs[BUFFER_SPAN].item[i]); + + for (i = 0; i < (size_t)md->work_bufs[BUFFER_BLOCK].asize; ++i) + bufrelease(md->work_bufs[BUFFER_BLOCK].item[i]); + + stack_free(&md->work_bufs[BUFFER_SPAN]); + stack_free(&md->work_bufs[BUFFER_BLOCK]); + + free(md); +} + +void +sd_version(int *ver_major, int *ver_minor, int *ver_revision) +{ + *ver_major = SUNDOWN_VER_MAJOR; + *ver_minor = SUNDOWN_VER_MINOR; + *ver_revision = SUNDOWN_VER_REVISION; +} + +/* vim: set filetype=c: */ diff --git a/test/fixtures/c/process.c b/test/fixtures/c/process.c new file mode 100644 index 00000000..d3eb998c --- /dev/null +++ b/test/fixtures/c/process.c @@ -0,0 +1,462 @@ +/* Copyright Joyent, Inc. and other Node contributors. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "uv.h" +#include "internal.h" + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __APPLE__ +# include +#endif + +#if defined(__APPLE__) && !TARGET_OS_IPHONE +# include +# define environ (*_NSGetEnviron()) +#else +extern char **environ; +#endif + + +static void uv__chld(EV_P_ ev_child* watcher, int revents) { + int status = watcher->rstatus; + int exit_status = 0; + int term_signal = 0; + uv_process_t *process = watcher->data; + + assert(&process->child_watcher == watcher); + assert(revents & EV_CHILD); + + ev_child_stop(EV_A_ &process->child_watcher); + + if (WIFEXITED(status)) { + exit_status = WEXITSTATUS(status); + } + + if (WIFSIGNALED(status)) { + term_signal = WTERMSIG(status); + } + + if (process->exit_cb) { + process->exit_cb(process, exit_status, term_signal); + } +} + + +int uv__make_socketpair(int fds[2], int flags) { +#ifdef SOCK_NONBLOCK + int fl; + + fl = SOCK_CLOEXEC; + + if (flags & UV__F_NONBLOCK) + fl |= SOCK_NONBLOCK; + + if (socketpair(AF_UNIX, SOCK_STREAM|fl, 0, fds) == 0) + return 0; + + if (errno != EINVAL) + return -1; + + /* errno == EINVAL so maybe the kernel headers lied about + * the availability of SOCK_NONBLOCK. This can happen if people + * build libuv against newer kernel headers than the kernel + * they actually run the software on. + */ +#endif + + if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds)) + return -1; + + uv__cloexec(fds[0], 1); + uv__cloexec(fds[1], 1); + + if (flags & UV__F_NONBLOCK) { + uv__nonblock(fds[0], 1); + uv__nonblock(fds[1], 1); + } + + return 0; +} + + +int uv__make_pipe(int fds[2], int flags) { +#if __linux__ + int fl; + + fl = UV__O_CLOEXEC; + + if (flags & UV__F_NONBLOCK) + fl |= UV__O_NONBLOCK; + + if (uv__pipe2(fds, fl) == 0) + return 0; + + if (errno != ENOSYS) + return -1; +#endif + + if (pipe(fds)) + return -1; + + uv__cloexec(fds[0], 1); + uv__cloexec(fds[1], 1); + + if (flags & UV__F_NONBLOCK) { + uv__nonblock(fds[0], 1); + uv__nonblock(fds[1], 1); + } + + return 0; +} + + +/* + * Used for initializing stdio streams like options.stdin_stream. Returns + * zero on success. + */ +static int uv__process_init_stdio(uv_stdio_container_t* container, int fds[2], + int writable) { + int fd = -1; + switch (container->flags & (UV_IGNORE | UV_CREATE_PIPE | UV_INHERIT_FD | + UV_INHERIT_STREAM)) { + case UV_IGNORE: + return 0; + case UV_CREATE_PIPE: + assert(container->data.stream != NULL); + + if (container->data.stream->type != UV_NAMED_PIPE) { + errno = EINVAL; + return -1; + } + + return uv__make_socketpair(fds, 0); + case UV_INHERIT_FD: + case UV_INHERIT_STREAM: + if (container->flags & UV_INHERIT_FD) { + fd = container->data.fd; + } else { + fd = container->data.stream->fd; + } + + if (fd == -1) { + errno = EINVAL; + return -1; + } + + fds[writable ? 1 : 0] = fd; + + return 0; + default: + assert(0 && "Unexpected flags"); + return -1; + } +} + + +static int uv__process_stdio_flags(uv_stdio_container_t* container, + int writable) { + if (container->data.stream->type == UV_NAMED_PIPE && + ((uv_pipe_t*)container->data.stream)->ipc) { + return UV_STREAM_READABLE | UV_STREAM_WRITABLE; + } else if (writable) { + return UV_STREAM_WRITABLE; + } else { + return UV_STREAM_READABLE; + } +} + + +static int uv__process_open_stream(uv_stdio_container_t* container, int fds[2], + int writable) { + int fd = fds[writable ? 1 : 0]; + int child_fd = fds[writable ? 0 : 1]; + int flags; + + /* No need to create stream */ + if (!(container->flags & UV_CREATE_PIPE) || fd < 0) { + return 0; + } + + assert(child_fd >= 0); + close(child_fd); + + uv__nonblock(fd, 1); + flags = uv__process_stdio_flags(container, writable); + + return uv__stream_open((uv_stream_t*)container->data.stream, fd, flags); +} + + +static void uv__process_close_stream(uv_stdio_container_t* container) { + if (!(container->flags & UV_CREATE_PIPE)) return; + + uv__stream_close((uv_stream_t*)container->data.stream); +} + + +static void uv__process_child_init(uv_process_options_t options, + int stdio_count, + int* pipes) { + int i; + + if (options.flags & UV_PROCESS_DETACHED) { + setsid(); + } + + /* Dup fds */ + for (i = 0; i < stdio_count; i++) { + /* + * stdin has swapped ends of pipe + * (it's the only one readable stream) + */ + int close_fd = i == 0 ? pipes[i * 2 + 1] : pipes[i * 2]; + int use_fd = i == 0 ? pipes[i * 2] : pipes[i * 2 + 1]; + + if (use_fd >= 0) { + close(close_fd); + } else if (i < 3) { + /* `/dev/null` stdin, stdout, stderr even if they've flag UV_IGNORE */ + use_fd = open("/dev/null", i == 0 ? O_RDONLY : O_RDWR); + + if (use_fd < 0) { + perror("failed to open stdio"); + _exit(127); + } + } else { + continue; + } + + if (i != use_fd) { + dup2(use_fd, i); + close(use_fd); + } + } + + if (options.cwd && chdir(options.cwd)) { + perror("chdir()"); + _exit(127); + } + + if ((options.flags & UV_PROCESS_SETGID) && setgid(options.gid)) { + perror("setgid()"); + _exit(127); + } + + if ((options.flags & UV_PROCESS_SETUID) && setuid(options.uid)) { + perror("setuid()"); + _exit(127); + } + + environ = options.env; + + execvp(options.file, options.args); + perror("execvp()"); + _exit(127); +} + + +#ifndef SPAWN_WAIT_EXEC +# define SPAWN_WAIT_EXEC 1 +#endif + +int uv_spawn(uv_loop_t* loop, uv_process_t* process, + uv_process_options_t options) { + /* + * Save environ in the case that we get it clobbered + * by the child process. + */ + char** save_our_env = environ; + + int stdio_count = options.stdio_count < 3 ? 3 : options.stdio_count; + int* pipes = malloc(2 * stdio_count * sizeof(int)); + +#if SPAWN_WAIT_EXEC + int signal_pipe[2] = { -1, -1 }; + struct pollfd pfd; +#endif + int status; + pid_t pid; + int i; + + if (pipes == NULL) { + errno = ENOMEM; + goto error; + } + + assert(options.file != NULL); + assert(!(options.flags & ~(UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS | + UV_PROCESS_DETACHED | + UV_PROCESS_SETGID | + UV_PROCESS_SETUID))); + + + uv__handle_init(loop, (uv_handle_t*)process, UV_PROCESS); + loop->counters.process_init++; + uv__handle_start(process); + + process->exit_cb = options.exit_cb; + + /* Init pipe pairs */ + for (i = 0; i < stdio_count; i++) { + pipes[i * 2] = -1; + pipes[i * 2 + 1] = -1; + } + + /* Create socketpairs/pipes, or use raw fd */ + for (i = 0; i < options.stdio_count; i++) { + if (uv__process_init_stdio(&options.stdio[i], pipes + i * 2, i != 0)) { + goto error; + } + } + + /* This pipe is used by the parent to wait until + * the child has called `execve()`. We need this + * to avoid the following race condition: + * + * if ((pid = fork()) > 0) { + * kill(pid, SIGTERM); + * } + * else if (pid == 0) { + * execve("/bin/cat", argp, envp); + * } + * + * The parent sends a signal immediately after forking. + * Since the child may not have called `execve()` yet, + * there is no telling what process receives the signal, + * our fork or /bin/cat. + * + * To avoid ambiguity, we create a pipe with both ends + * marked close-on-exec. Then, after the call to `fork()`, + * the parent polls the read end until it sees POLLHUP. + */ +#if SPAWN_WAIT_EXEC + if (uv__make_pipe(signal_pipe, UV__F_NONBLOCK)) + goto error; +#endif + + pid = fork(); + + if (pid == -1) { +#if SPAWN_WAIT_EXEC + close(signal_pipe[0]); + close(signal_pipe[1]); +#endif + environ = save_our_env; + goto error; + } + + if (pid == 0) { + /* Child */ + uv__process_child_init(options, stdio_count, pipes); + + /* Execution never reaches here. */ + } + + /* Parent. */ + + /* Restore environment. */ + environ = save_our_env; + +#if SPAWN_WAIT_EXEC + /* POLLHUP signals child has exited or execve()'d. */ + close(signal_pipe[1]); + do { + pfd.fd = signal_pipe[0]; + pfd.events = POLLIN|POLLHUP; + pfd.revents = 0; + errno = 0, status = poll(&pfd, 1, -1); + } + while (status == -1 && (errno == EINTR || errno == ENOMEM)); + + assert((status == 1) && "poll() on pipe read end failed"); + close(signal_pipe[0]); +#endif + + process->pid = pid; + + ev_child_init(&process->child_watcher, uv__chld, pid, 0); + ev_child_start(process->loop->ev, &process->child_watcher); + process->child_watcher.data = process; + + for (i = 0; i < options.stdio_count; i++) { + if (uv__process_open_stream(&options.stdio[i], pipes + i * 2, i == 0)) { + int j; + /* Close all opened streams */ + for (j = 0; j < i; j++) { + uv__process_close_stream(&options.stdio[j]); + } + + goto error; + } + } + + free(pipes); + + return 0; + +error: + uv__set_sys_error(process->loop, errno); + + for (i = 0; i < stdio_count; i++) { + close(pipes[i * 2]); + close(pipes[i * 2 + 1]); + } + + free(pipes); + + return -1; +} + + +int uv_process_kill(uv_process_t* process, int signum) { + int r = kill(process->pid, signum); + + if (r) { + uv__set_sys_error(process->loop, errno); + return -1; + } else { + return 0; + } +} + + +uv_err_t uv_kill(int pid, int signum) { + int r = kill(pid, signum); + + if (r) { + return uv__new_sys_error(errno); + } else { + return uv_ok_; + } +} + + +void uv__process_close(uv_process_t* handle) { + ev_child_stop(handle->loop->ev, &handle->child_watcher); + uv__handle_stop(handle); +} diff --git a/test/fixtures/c/rdiscount.c b/test/fixtures/c/rdiscount.c new file mode 100644 index 00000000..86167ffa --- /dev/null +++ b/test/fixtures/c/rdiscount.c @@ -0,0 +1,129 @@ +#include +#include "ruby.h" +#include "mkdio.h" + +static VALUE rb_cRDiscount; + +static VALUE +rb_rdiscount_to_html(int argc, VALUE *argv, VALUE self) +{ + /* grab char pointer to markdown input text */ + char *res; + int szres; + VALUE encoding; + VALUE text = rb_funcall(self, rb_intern("text"), 0); + VALUE buf = rb_str_buf_new(1024); + Check_Type(text, T_STRING); + + int flags = rb_rdiscount__get_flags(self); + + MMIOT *doc = mkd_string(RSTRING_PTR(text), RSTRING_LEN(text), flags); + + if ( mkd_compile(doc, flags) ) { + szres = mkd_document(doc, &res); + + if ( szres != EOF ) { + rb_str_cat(buf, res, szres); + rb_str_cat(buf, "\n", 1); + } + } + mkd_cleanup(doc); + + + /* force the input encoding */ + if ( rb_respond_to(text, rb_intern("encoding")) ) { + encoding = rb_funcall(text, rb_intern("encoding"), 0); + rb_funcall(buf, rb_intern("force_encoding"), 1, encoding); + } + + return buf; +} + +static VALUE +rb_rdiscount_toc_content(int argc, VALUE *argv, VALUE self) +{ + char *res; + int szres; + + int flags = rb_rdiscount__get_flags(self); + + /* grab char pointer to markdown input text */ + VALUE text = rb_funcall(self, rb_intern("text"), 0); + Check_Type(text, T_STRING); + + /* allocate a ruby string buffer and wrap it in a stream */ + VALUE buf = rb_str_buf_new(4096); + + MMIOT *doc = mkd_string(RSTRING_PTR(text), RSTRING_LEN(text), flags); + + if ( mkd_compile(doc, flags) ) { + szres = mkd_toc(doc, &res); + + if ( szres != EOF ) { + rb_str_cat(buf, res, szres); + rb_str_cat(buf, "\n", 1); + } + } + mkd_cleanup(doc); + + return buf; +} + +int rb_rdiscount__get_flags(VALUE ruby_obj) +{ + /* compile flags */ + int flags = MKD_TABSTOP | MKD_NOHEADER; + + /* smart */ + if ( rb_funcall(ruby_obj, rb_intern("smart"), 0) != Qtrue ) + flags = flags | MKD_NOPANTS; + + /* filter_html */ + if ( rb_funcall(ruby_obj, rb_intern("filter_html"), 0) == Qtrue ) + flags = flags | MKD_NOHTML; + + /* generate_toc */ + if ( rb_funcall(ruby_obj, rb_intern("generate_toc"), 0) == Qtrue) + flags = flags | MKD_TOC; + + /* no_image */ + if ( rb_funcall(ruby_obj, rb_intern("no_image"), 0) == Qtrue) + flags = flags | MKD_NOIMAGE; + + /* no_links */ + if ( rb_funcall(ruby_obj, rb_intern("no_links"), 0) == Qtrue) + flags = flags | MKD_NOLINKS; + + /* no_tables */ + if ( rb_funcall(ruby_obj, rb_intern("no_tables"), 0) == Qtrue) + flags = flags | MKD_NOTABLES; + + /* strict */ + if ( rb_funcall(ruby_obj, rb_intern("strict"), 0) == Qtrue) + flags = flags | MKD_STRICT; + + /* autolink */ + if ( rb_funcall(ruby_obj, rb_intern("autolink"), 0) == Qtrue) + flags = flags | MKD_AUTOLINK; + + /* safelink */ + if ( rb_funcall(ruby_obj, rb_intern("safelink"), 0) == Qtrue) + flags = flags | MKD_SAFELINK; + + /* no_pseudo_protocols */ + if ( rb_funcall(ruby_obj, rb_intern("no_pseudo_protocols"), 0) == Qtrue) + flags = flags | MKD_NO_EXT; + + + return flags; +} + + +void Init_rdiscount() +{ + rb_cRDiscount = rb_define_class("RDiscount", rb_cObject); + rb_define_method(rb_cRDiscount, "to_html", rb_rdiscount_to_html, -1); + rb_define_method(rb_cRDiscount, "toc_content", rb_rdiscount_toc_content, -1); +} + +/* vim: set ts=4 sw=4: */ diff --git a/test/fixtures/c/redis.c b/test/fixtures/c/redis.c new file mode 100644 index 00000000..b1f83386 --- /dev/null +++ b/test/fixtures/c/redis.c @@ -0,0 +1,2538 @@ +/* + * Copyright (c) 2009-2010, Salvatore Sanfilippo + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of Redis nor the names of its contributors may be used + * to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "redis.h" +#include "slowlog.h" +#include "bio.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Our shared "common" objects */ + +struct sharedObjectsStruct shared; + +/* Global vars that are actually used as constants. The following double + * values are used for double on-disk serialization, and are initialized + * at runtime to avoid strange compiler optimizations. */ + +double R_Zero, R_PosInf, R_NegInf, R_Nan; + +/*================================= Globals ================================= */ + +/* Global vars */ +struct redisServer server; /* server global state */ +struct redisCommand *commandTable; + +/* Our command table. + * + * Every entry is composed of the following fields: + * + * name: a string representing the command name. + * function: pointer to the C function implementing the command. + * arity: number of arguments, it is possible to use -N to say >= N + * sflags: command flags as string. See below for a table of flags. + * flags: flags as bitmask. Computed by Redis using the 'sflags' field. + * get_keys_proc: an optional function to get key arguments from a command. + * This is only used when the following three fields are not + * enough to specify what arguments are keys. + * first_key_index: first argument that is a key + * last_key_index: last argument that is a key + * key_step: step to get all the keys from first to last argument. For instance + * in MSET the step is two since arguments are key,val,key,val,... + * microseconds: microseconds of total execution time for this command. + * calls: total number of calls of this command. + * + * The flags, microseconds and calls fields are computed by Redis and should + * always be set to zero. + * + * Command flags are expressed using strings where every character represents + * a flag. Later the populateCommandTable() function will take care of + * populating the real 'flags' field using this characters. + * + * This is the meaning of the flags: + * + * w: write command (may modify the key space). + * r: read command (will never modify the key space). + * m: may increase memory usage once called. Don't allow if out of memory. + * a: admin command, like SAVE or SHUTDOWN. + * p: Pub/Sub related command. + * f: force replication of this command, regarless of server.dirty. + * s: command not allowed in scripts. + * R: random command. Command is not deterministic, that is, the same command + * with the same arguments, with the same key space, may have different + * results. For instance SPOP and RANDOMKEY are two random commands. + * S: Sort command output array if called from script, so that the output + * is deterministic. + */ +struct redisCommand redisCommandTable[] = { + {"get",getCommand,2,"r",0,NULL,1,1,1,0,0}, + {"set",setCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0}, + {"setnx",setnxCommand,3,"wm",0,noPreloadGetKeys,1,1,1,0,0}, + {"setex",setexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0}, + {"psetex",psetexCommand,4,"wm",0,noPreloadGetKeys,1,1,1,0,0}, + {"append",appendCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"strlen",strlenCommand,2,"r",0,NULL,1,1,1,0,0}, + {"del",delCommand,-2,"w",0,noPreloadGetKeys,1,-1,1,0,0}, + {"exists",existsCommand,2,"r",0,NULL,1,1,1,0,0}, + {"setbit",setbitCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"getbit",getbitCommand,3,"r",0,NULL,1,1,1,0,0}, + {"setrange",setrangeCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"getrange",getrangeCommand,4,"r",0,NULL,1,1,1,0,0}, + {"substr",getrangeCommand,4,"r",0,NULL,1,1,1,0,0}, + {"incr",incrCommand,2,"wm",0,NULL,1,1,1,0,0}, + {"decr",decrCommand,2,"wm",0,NULL,1,1,1,0,0}, + {"mget",mgetCommand,-2,"r",0,NULL,1,-1,1,0,0}, + {"rpush",rpushCommand,-3,"wm",0,NULL,1,1,1,0,0}, + {"lpush",lpushCommand,-3,"wm",0,NULL,1,1,1,0,0}, + {"rpushx",rpushxCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"lpushx",lpushxCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"linsert",linsertCommand,5,"wm",0,NULL,1,1,1,0,0}, + {"rpop",rpopCommand,2,"w",0,NULL,1,1,1,0,0}, + {"lpop",lpopCommand,2,"w",0,NULL,1,1,1,0,0}, + {"brpop",brpopCommand,-3,"ws",0,NULL,1,1,1,0,0}, + {"brpoplpush",brpoplpushCommand,4,"wms",0,NULL,1,2,1,0,0}, + {"blpop",blpopCommand,-3,"ws",0,NULL,1,-2,1,0,0}, + {"llen",llenCommand,2,"r",0,NULL,1,1,1,0,0}, + {"lindex",lindexCommand,3,"r",0,NULL,1,1,1,0,0}, + {"lset",lsetCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"lrange",lrangeCommand,4,"r",0,NULL,1,1,1,0,0}, + {"ltrim",ltrimCommand,4,"w",0,NULL,1,1,1,0,0}, + {"lrem",lremCommand,4,"w",0,NULL,1,1,1,0,0}, + {"rpoplpush",rpoplpushCommand,3,"wm",0,NULL,1,2,1,0,0}, + {"sadd",saddCommand,-3,"wm",0,NULL,1,1,1,0,0}, + {"srem",sremCommand,-3,"w",0,NULL,1,1,1,0,0}, + {"smove",smoveCommand,4,"w",0,NULL,1,2,1,0,0}, + {"sismember",sismemberCommand,3,"r",0,NULL,1,1,1,0,0}, + {"scard",scardCommand,2,"r",0,NULL,1,1,1,0,0}, + {"spop",spopCommand,2,"wRs",0,NULL,1,1,1,0,0}, + {"srandmember",srandmemberCommand,2,"rR",0,NULL,1,1,1,0,0}, + {"sinter",sinterCommand,-2,"rS",0,NULL,1,-1,1,0,0}, + {"sinterstore",sinterstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0}, + {"sunion",sunionCommand,-2,"rS",0,NULL,1,-1,1,0,0}, + {"sunionstore",sunionstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0}, + {"sdiff",sdiffCommand,-2,"rS",0,NULL,1,-1,1,0,0}, + {"sdiffstore",sdiffstoreCommand,-3,"wm",0,NULL,1,-1,1,0,0}, + {"smembers",sinterCommand,2,"rS",0,NULL,1,1,1,0,0}, + {"zadd",zaddCommand,-4,"wm",0,NULL,1,1,1,0,0}, + {"zincrby",zincrbyCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"zrem",zremCommand,-3,"w",0,NULL,1,1,1,0,0}, + {"zremrangebyscore",zremrangebyscoreCommand,4,"w",0,NULL,1,1,1,0,0}, + {"zremrangebyrank",zremrangebyrankCommand,4,"w",0,NULL,1,1,1,0,0}, + {"zunionstore",zunionstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0}, + {"zinterstore",zinterstoreCommand,-4,"wm",0,zunionInterGetKeys,0,0,0,0,0}, + {"zrange",zrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"zrangebyscore",zrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"zrevrangebyscore",zrevrangebyscoreCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"zcount",zcountCommand,4,"r",0,NULL,1,1,1,0,0}, + {"zrevrange",zrevrangeCommand,-4,"r",0,NULL,1,1,1,0,0}, + {"zcard",zcardCommand,2,"r",0,NULL,1,1,1,0,0}, + {"zscore",zscoreCommand,3,"r",0,NULL,1,1,1,0,0}, + {"zrank",zrankCommand,3,"r",0,NULL,1,1,1,0,0}, + {"zrevrank",zrevrankCommand,3,"r",0,NULL,1,1,1,0,0}, + {"hset",hsetCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"hsetnx",hsetnxCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"hget",hgetCommand,3,"r",0,NULL,1,1,1,0,0}, + {"hmset",hmsetCommand,-4,"wm",0,NULL,1,1,1,0,0}, + {"hmget",hmgetCommand,-3,"r",0,NULL,1,1,1,0,0}, + {"hincrby",hincrbyCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"hincrbyfloat",hincrbyfloatCommand,4,"wm",0,NULL,1,1,1,0,0}, + {"hdel",hdelCommand,-3,"w",0,NULL,1,1,1,0,0}, + {"hlen",hlenCommand,2,"r",0,NULL,1,1,1,0,0}, + {"hkeys",hkeysCommand,2,"rS",0,NULL,1,1,1,0,0}, + {"hvals",hvalsCommand,2,"rS",0,NULL,1,1,1,0,0}, + {"hgetall",hgetallCommand,2,"r",0,NULL,1,1,1,0,0}, + {"hexists",hexistsCommand,3,"r",0,NULL,1,1,1,0,0}, + {"incrby",incrbyCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"decrby",decrbyCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"incrbyfloat",incrbyfloatCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"getset",getsetCommand,3,"wm",0,NULL,1,1,1,0,0}, + {"mset",msetCommand,-3,"wm",0,NULL,1,-1,2,0,0}, + {"msetnx",msetnxCommand,-3,"wm",0,NULL,1,-1,2,0,0}, + {"randomkey",randomkeyCommand,1,"rR",0,NULL,0,0,0,0,0}, + {"select",selectCommand,2,"r",0,NULL,0,0,0,0,0}, + {"move",moveCommand,3,"w",0,NULL,1,1,1,0,0}, + {"rename",renameCommand,3,"w",0,renameGetKeys,1,2,1,0,0}, + {"renamenx",renamenxCommand,3,"w",0,renameGetKeys,1,2,1,0,0}, + {"expire",expireCommand,3,"w",0,NULL,1,1,1,0,0}, + {"expireat",expireatCommand,3,"w",0,NULL,1,1,1,0,0}, + {"pexpire",pexpireCommand,3,"w",0,NULL,1,1,1,0,0}, + {"pexpireat",pexpireatCommand,3,"w",0,NULL,1,1,1,0,0}, + {"keys",keysCommand,2,"rS",0,NULL,0,0,0,0,0}, + {"dbsize",dbsizeCommand,1,"r",0,NULL,0,0,0,0,0}, + {"auth",authCommand,2,"rs",0,NULL,0,0,0,0,0}, + {"ping",pingCommand,1,"r",0,NULL,0,0,0,0,0}, + {"echo",echoCommand,2,"r",0,NULL,0,0,0,0,0}, + {"save",saveCommand,1,"ars",0,NULL,0,0,0,0,0}, + {"bgsave",bgsaveCommand,1,"ar",0,NULL,0,0,0,0,0}, + {"bgrewriteaof",bgrewriteaofCommand,1,"ar",0,NULL,0,0,0,0,0}, + {"shutdown",shutdownCommand,-1,"ar",0,NULL,0,0,0,0,0}, + {"lastsave",lastsaveCommand,1,"r",0,NULL,0,0,0,0,0}, + {"type",typeCommand,2,"r",0,NULL,1,1,1,0,0}, + {"multi",multiCommand,1,"rs",0,NULL,0,0,0,0,0}, + {"exec",execCommand,1,"s",0,NULL,0,0,0,0,0}, + {"discard",discardCommand,1,"rs",0,NULL,0,0,0,0,0}, + {"sync",syncCommand,1,"ars",0,NULL,0,0,0,0,0}, + {"flushdb",flushdbCommand,1,"w",0,NULL,0,0,0,0,0}, + {"flushall",flushallCommand,1,"w",0,NULL,0,0,0,0,0}, + {"sort",sortCommand,-2,"wmS",0,NULL,1,1,1,0,0}, + {"info",infoCommand,-1,"r",0,NULL,0,0,0,0,0}, + {"monitor",monitorCommand,1,"ars",0,NULL,0,0,0,0,0}, + {"ttl",ttlCommand,2,"r",0,NULL,1,1,1,0,0}, + {"pttl",pttlCommand,2,"r",0,NULL,1,1,1,0,0}, + {"persist",persistCommand,2,"w",0,NULL,1,1,1,0,0}, + {"slaveof",slaveofCommand,3,"as",0,NULL,0,0,0,0,0}, + {"debug",debugCommand,-2,"as",0,NULL,0,0,0,0,0}, + {"config",configCommand,-2,"ar",0,NULL,0,0,0,0,0}, + {"subscribe",subscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, + {"unsubscribe",unsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, + {"psubscribe",psubscribeCommand,-2,"rps",0,NULL,0,0,0,0,0}, + {"punsubscribe",punsubscribeCommand,-1,"rps",0,NULL,0,0,0,0,0}, + {"publish",publishCommand,3,"pf",0,NULL,0,0,0,0,0}, + {"watch",watchCommand,-2,"rs",0,noPreloadGetKeys,1,-1,1,0,0}, + {"unwatch",unwatchCommand,1,"rs",0,NULL,0,0,0,0,0}, + {"cluster",clusterCommand,-2,"ar",0,NULL,0,0,0,0,0}, + {"restore",restoreCommand,4,"awm",0,NULL,1,1,1,0,0}, + {"migrate",migrateCommand,6,"aw",0,NULL,0,0,0,0,0}, + {"asking",askingCommand,1,"r",0,NULL,0,0,0,0,0}, + {"dump",dumpCommand,2,"ar",0,NULL,1,1,1,0,0}, + {"object",objectCommand,-2,"r",0,NULL,2,2,2,0,0}, + {"client",clientCommand,-2,"ar",0,NULL,0,0,0,0,0}, + {"eval",evalCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, + {"evalsha",evalShaCommand,-3,"s",0,zunionInterGetKeys,0,0,0,0,0}, + {"slowlog",slowlogCommand,-2,"r",0,NULL,0,0,0,0,0}, + {"script",scriptCommand,-2,"ras",0,NULL,0,0,0,0,0}, + {"time",timeCommand,1,"rR",0,NULL,0,0,0,0,0}, + {"bitop",bitopCommand,-4,"wm",0,NULL,2,-1,1,0,0}, + {"bitcount",bitcountCommand,-2,"r",0,NULL,1,1,1,0,0} +}; + +/*============================ Utility functions ============================ */ + +/* Low level logging. To use only for very big messages, otherwise + * redisLog() is to prefer. */ +void redisLogRaw(int level, const char *msg) { + const int syslogLevelMap[] = { LOG_DEBUG, LOG_INFO, LOG_NOTICE, LOG_WARNING }; + const char *c = ".-*#"; + FILE *fp; + char buf[64]; + int rawmode = (level & REDIS_LOG_RAW); + + level &= 0xff; /* clear flags */ + if (level < server.verbosity) return; + + fp = (server.logfile == NULL) ? stdout : fopen(server.logfile,"a"); + if (!fp) return; + + if (rawmode) { + fprintf(fp,"%s",msg); + } else { + int off; + struct timeval tv; + + gettimeofday(&tv,NULL); + off = strftime(buf,sizeof(buf),"%d %b %H:%M:%S.",localtime(&tv.tv_sec)); + snprintf(buf+off,sizeof(buf)-off,"%03d",(int)tv.tv_usec/1000); + fprintf(fp,"[%d] %s %c %s\n",(int)getpid(),buf,c[level],msg); + } + fflush(fp); + + if (server.logfile) fclose(fp); + + if (server.syslog_enabled) syslog(syslogLevelMap[level], "%s", msg); +} + +/* Like redisLogRaw() but with printf-alike support. This is the funciton that + * is used across the code. The raw version is only used in order to dump + * the INFO output on crash. */ +void redisLog(int level, const char *fmt, ...) { + va_list ap; + char msg[REDIS_MAX_LOGMSG_LEN]; + + if ((level&0xff) < server.verbosity) return; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + redisLogRaw(level,msg); +} + +/* Log a fixed message without printf-alike capabilities, in a way that is + * safe to call from a signal handler. + * + * We actually use this only for signals that are not fatal from the point + * of view of Redis. Signals that are going to kill the server anyway and + * where we need printf-alike features are served by redisLog(). */ +void redisLogFromHandler(int level, const char *msg) { + int fd; + char buf[64]; + + if ((level&0xff) < server.verbosity || + (server.logfile == NULL && server.daemonize)) return; + fd = server.logfile ? + open(server.logfile, O_APPEND|O_CREAT|O_WRONLY, 0644) : + STDOUT_FILENO; + if (fd == -1) return; + ll2string(buf,sizeof(buf),getpid()); + if (write(fd,"[",1) == -1) goto err; + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd," | signal handler] (",20) == -1) goto err; + ll2string(buf,sizeof(buf),time(NULL)); + if (write(fd,buf,strlen(buf)) == -1) goto err; + if (write(fd,") ",2) == -1) goto err; + if (write(fd,msg,strlen(msg)) == -1) goto err; + if (write(fd,"\n",1) == -1) goto err; +err: + if (server.logfile) close(fd); +} + +/* Redis generally does not try to recover from out of memory conditions + * when allocating objects or strings, it is not clear if it will be possible + * to report this condition to the client since the networking layer itself + * is based on heap allocation for send buffers, so we simply abort. + * At least the code will be simpler to read... */ +void oom(const char *msg) { + redisLog(REDIS_WARNING, "%s: Out of memory\n",msg); + sleep(1); + abort(); +} + +/* Return the UNIX time in microseconds */ +long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +/* Return the UNIX time in milliseconds */ +long long mstime(void) { + return ustime()/1000; +} + +/* After an RDB dump or AOF rewrite we exit from children using _exit() instead of + * exit(), because the latter may interact with the same file objects used by + * the parent process. However if we are testing the coverage normal exit() is + * used in order to obtain the right coverage information. */ +void exitFromChild(int retcode) { +#ifdef COVERAGE_TEST + exit(retcode); +#else + _exit(retcode); +#endif +} + +/*====================== Hash table type implementation ==================== */ + +/* This is an hash table type that uses the SDS dynamic strings libary as + * keys and radis objects as values (objects can hold SDS strings, + * lists, sets). */ + +void dictVanillaFree(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + zfree(val); +} + +void dictListDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + listRelease((list*)val); +} + +int dictSdsKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + int l1,l2; + DICT_NOTUSED(privdata); + + l1 = sdslen((sds)key1); + l2 = sdslen((sds)key2); + if (l1 != l2) return 0; + return memcmp(key1, key2, l1) == 0; +} + +/* A case insensitive version used for the command lookup table. */ +int dictSdsKeyCaseCompare(void *privdata, const void *key1, + const void *key2) +{ + DICT_NOTUSED(privdata); + + return strcasecmp(key1, key2) == 0; +} + +void dictRedisObjectDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + if (val == NULL) return; /* Values of swapped out keys as set to NULL */ + decrRefCount(val); +} + +void dictSdsDestructor(void *privdata, void *val) +{ + DICT_NOTUSED(privdata); + + sdsfree(val); +} + +int dictObjKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + const robj *o1 = key1, *o2 = key2; + return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); +} + +unsigned int dictObjHash(const void *key) { + const robj *o = key; + return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr)); +} + +unsigned int dictSdsHash(const void *key) { + return dictGenHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +unsigned int dictSdsCaseHash(const void *key) { + return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key)); +} + +int dictEncObjKeyCompare(void *privdata, const void *key1, + const void *key2) +{ + robj *o1 = (robj*) key1, *o2 = (robj*) key2; + int cmp; + + if (o1->encoding == REDIS_ENCODING_INT && + o2->encoding == REDIS_ENCODING_INT) + return o1->ptr == o2->ptr; + + o1 = getDecodedObject(o1); + o2 = getDecodedObject(o2); + cmp = dictSdsKeyCompare(privdata,o1->ptr,o2->ptr); + decrRefCount(o1); + decrRefCount(o2); + return cmp; +} + +unsigned int dictEncObjHash(const void *key) { + robj *o = (robj*) key; + + if (o->encoding == REDIS_ENCODING_RAW) { + return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr)); + } else { + if (o->encoding == REDIS_ENCODING_INT) { + char buf[32]; + int len; + + len = ll2string(buf,32,(long)o->ptr); + return dictGenHashFunction((unsigned char*)buf, len); + } else { + unsigned int hash; + + o = getDecodedObject(o); + hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr)); + decrRefCount(o); + return hash; + } + } +} + +/* Sets type hash table */ +dictType setDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictRedisObjectDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* Sorted sets hash (note: a skiplist is used in addition to the hash table) */ +dictType zsetDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictRedisObjectDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* Db->dict, keys are sds strings, vals are Redis objects. */ +dictType dbDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + dictRedisObjectDestructor /* val destructor */ +}; + +/* Db->expires */ +dictType keyptrDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + NULL /* val destructor */ +}; + +/* Command table. sds string -> command struct pointer. */ +dictType commandTableDictType = { + dictSdsCaseHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCaseCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +/* Hash type hash table (note that small hashes are represented with zimpaps) */ +dictType hashDictType = { + dictEncObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictEncObjKeyCompare, /* key compare */ + dictRedisObjectDestructor, /* key destructor */ + dictRedisObjectDestructor /* val destructor */ +}; + +/* Keylist hash table type has unencoded redis objects as keys and + * lists as values. It's used for blocking operations (BLPOP) and to + * map swapped keys to a list of clients waiting for this keys to be loaded. */ +dictType keylistDictType = { + dictObjHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictObjKeyCompare, /* key compare */ + dictRedisObjectDestructor, /* key destructor */ + dictListDestructor /* val destructor */ +}; + +/* Cluster nodes hash table, mapping nodes addresses 1.2.3.4:6379 to + * clusterNode structures. */ +dictType clusterNodesDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + dictSdsDestructor, /* key destructor */ + NULL /* val destructor */ +}; + +int htNeedsResize(dict *dict) { + long long size, used; + + size = dictSlots(dict); + used = dictSize(dict); + return (size && used && size > DICT_HT_INITIAL_SIZE && + (used*100/size < REDIS_HT_MINFILL)); +} + +/* If the percentage of used slots in the HT reaches REDIS_HT_MINFILL + * we resize the hash table to save memory */ +void tryResizeHashTables(void) { + int j; + + for (j = 0; j < server.dbnum; j++) { + if (htNeedsResize(server.db[j].dict)) + dictResize(server.db[j].dict); + if (htNeedsResize(server.db[j].expires)) + dictResize(server.db[j].expires); + } +} + +/* Our hash table implementation performs rehashing incrementally while + * we write/read from the hash table. Still if the server is idle, the hash + * table will use two tables for a long time. So we try to use 1 millisecond + * of CPU time at every serverCron() loop in order to rehash some key. */ +void incrementallyRehash(void) { + int j; + + for (j = 0; j < server.dbnum; j++) { + /* Keys dictionary */ + if (dictIsRehashing(server.db[j].dict)) { + dictRehashMilliseconds(server.db[j].dict,1); + break; /* already used our millisecond for this loop... */ + } + /* Expires */ + if (dictIsRehashing(server.db[j].expires)) { + dictRehashMilliseconds(server.db[j].expires,1); + break; /* already used our millisecond for this loop... */ + } + } +} + +/* This function is called once a background process of some kind terminates, + * as we want to avoid resizing the hash tables when there is a child in order + * to play well with copy-on-write (otherwise when a resize happens lots of + * memory pages are copied). The goal of this function is to update the ability + * for dict.c to resize the hash tables accordingly to the fact we have o not + * running childs. */ +void updateDictResizePolicy(void) { + if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) + dictEnableResize(); + else + dictDisableResize(); +} + +/* ======================= Cron: called every 100 ms ======================== */ + +/* Try to expire a few timed out keys. The algorithm used is adaptive and + * will use few CPU cycles if there are few expiring keys, otherwise + * it will get more aggressive to avoid that too much memory is used by + * keys that can be removed from the keyspace. */ +void activeExpireCycle(void) { + int j, iteration = 0; + long long start = ustime(), timelimit; + + /* We can use at max REDIS_EXPIRELOOKUPS_TIME_PERC percentage of CPU time + * per iteration. Since this function gets called with a frequency of + * REDIS_HZ times per second, the following is the max amount of + * microseconds we can spend in this function. */ + timelimit = 1000000*REDIS_EXPIRELOOKUPS_TIME_PERC/REDIS_HZ/100; + if (timelimit <= 0) timelimit = 1; + + for (j = 0; j < server.dbnum; j++) { + int expired; + redisDb *db = server.db+j; + + /* Continue to expire if at the end of the cycle more than 25% + * of the keys were expired. */ + do { + unsigned long num = dictSize(db->expires); + unsigned long slots = dictSlots(db->expires); + long long now = mstime(); + + /* When there are less than 1% filled slots getting random + * keys is expensive, so stop here waiting for better times... + * The dictionary will be resized asap. */ + if (num && slots > DICT_HT_INITIAL_SIZE && + (num*100/slots < 1)) break; + + /* The main collection cycle. Sample random keys among keys + * with an expire set, checking for expired ones. */ + expired = 0; + if (num > REDIS_EXPIRELOOKUPS_PER_CRON) + num = REDIS_EXPIRELOOKUPS_PER_CRON; + while (num--) { + dictEntry *de; + long long t; + + if ((de = dictGetRandomKey(db->expires)) == NULL) break; + t = dictGetSignedIntegerVal(de); + if (now > t) { + sds key = dictGetKey(de); + robj *keyobj = createStringObject(key,sdslen(key)); + + propagateExpire(db,keyobj); + dbDelete(db,keyobj); + decrRefCount(keyobj); + expired++; + server.stat_expiredkeys++; + } + } + /* We can't block forever here even if there are many keys to + * expire. So after a given amount of milliseconds return to the + * caller waiting for the other active expire cycle. */ + iteration++; + if ((iteration & 0xf) == 0 && /* check once every 16 cycles. */ + (ustime()-start) > timelimit) return; + } while (expired > REDIS_EXPIRELOOKUPS_PER_CRON/4); + } +} + +void updateLRUClock(void) { + server.lruclock = (server.unixtime/REDIS_LRU_CLOCK_RESOLUTION) & + REDIS_LRU_CLOCK_MAX; +} + + +/* Add a sample to the operations per second array of samples. */ +void trackOperationsPerSecond(void) { + long long t = mstime() - server.ops_sec_last_sample_time; + long long ops = server.stat_numcommands - server.ops_sec_last_sample_ops; + long long ops_sec; + + ops_sec = t > 0 ? (ops*1000/t) : 0; + + server.ops_sec_samples[server.ops_sec_idx] = ops_sec; + server.ops_sec_idx = (server.ops_sec_idx+1) % REDIS_OPS_SEC_SAMPLES; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = server.stat_numcommands; +} + +/* Return the mean of all the samples. */ +long long getOperationsPerSecond(void) { + int j; + long long sum = 0; + + for (j = 0; j < REDIS_OPS_SEC_SAMPLES; j++) + sum += server.ops_sec_samples[j]; + return sum / REDIS_OPS_SEC_SAMPLES; +} + +/* Check for timeouts. Returns non-zero if the client was terminated */ +int clientsCronHandleTimeout(redisClient *c) { + time_t now = server.unixtime; + + if (server.maxidletime && + !(c->flags & REDIS_SLAVE) && /* no timeout for slaves */ + !(c->flags & REDIS_MASTER) && /* no timeout for masters */ + !(c->flags & REDIS_BLOCKED) && /* no timeout for BLPOP */ + dictSize(c->pubsub_channels) == 0 && /* no timeout for pubsub */ + listLength(c->pubsub_patterns) == 0 && + (now - c->lastinteraction > server.maxidletime)) + { + redisLog(REDIS_VERBOSE,"Closing idle client"); + freeClient(c); + return 1; + } else if (c->flags & REDIS_BLOCKED) { + if (c->bpop.timeout != 0 && c->bpop.timeout < now) { + addReply(c,shared.nullmultibulk); + unblockClientWaitingData(c); + } + } + return 0; +} + +/* The client query buffer is an sds.c string that can end with a lot of + * free space not used, this function reclaims space if needed. + * + * The funciton always returns 0 as it never terminates the client. */ +int clientsCronResizeQueryBuffer(redisClient *c) { + size_t querybuf_size = sdsAllocSize(c->querybuf); + time_t idletime = server.unixtime - c->lastinteraction; + + /* There are two conditions to resize the query buffer: + * 1) Query buffer is > BIG_ARG and too big for latest peak. + * 2) Client is inactive and the buffer is bigger than 1k. */ + if (((querybuf_size > REDIS_MBULK_BIG_ARG) && + (querybuf_size/(c->querybuf_peak+1)) > 2) || + (querybuf_size > 1024 && idletime > 2)) + { + /* Only resize the query buffer if it is actually wasting space. */ + if (sdsavail(c->querybuf) > 1024) { + c->querybuf = sdsRemoveFreeSpace(c->querybuf); + } + } + /* Reset the peak again to capture the peak memory usage in the next + * cycle. */ + c->querybuf_peak = 0; + return 0; +} + +void clientsCron(void) { + /* Make sure to process at least 1/(REDIS_HZ*10) of clients per call. + * Since this function is called REDIS_HZ times per second we are sure that + * in the worst case we process all the clients in 10 seconds. + * In normal conditions (a reasonable number of clients) we process + * all the clients in a shorter time. */ + int numclients = listLength(server.clients); + int iterations = numclients/(REDIS_HZ*10); + + if (iterations < 50) + iterations = (numclients < 50) ? numclients : 50; + while(listLength(server.clients) && iterations--) { + redisClient *c; + listNode *head; + + /* Rotate the list, take the current head, process. + * This way if the client must be removed from the list it's the + * first element and we don't incur into O(N) computation. */ + listRotate(server.clients); + head = listFirst(server.clients); + c = listNodeValue(head); + /* The following functions do different service checks on the client. + * The protocol is that they return non-zero if the client was + * terminated. */ + if (clientsCronHandleTimeout(c)) continue; + if (clientsCronResizeQueryBuffer(c)) continue; + } +} + +/* This is our timer interrupt, called REDIS_HZ times per second. + * Here is where we do a number of things that need to be done asynchronously. + * For instance: + * + * - Active expired keys collection (it is also performed in a lazy way on + * lookup). + * - Software watchdong. + * - Update some statistic. + * - Incremental rehashing of the DBs hash tables. + * - Triggering BGSAVE / AOF rewrite, and handling of terminated children. + * - Clients timeout of differnet kinds. + * - Replication reconnection. + * - Many more... + * + * Everything directly called here will be called REDIS_HZ times per second, + * so in order to throttle execution of things we want to do less frequently + * a macro is used: run_with_period(milliseconds) { .... } + */ + +/* Using the following macro you can run code inside serverCron() with the + * specified period, specified in milliseconds. + * The actual resolution depends on REDIS_HZ. */ +#define run_with_period(_ms_) if (!(loops % ((_ms_)/(1000/REDIS_HZ)))) + +int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) { + int j, loops = server.cronloops; + REDIS_NOTUSED(eventLoop); + REDIS_NOTUSED(id); + REDIS_NOTUSED(clientData); + + /* Software watchdog: deliver the SIGALRM that will reach the signal + * handler if we don't return here fast enough. */ + if (server.watchdog_period) watchdogScheduleSignal(server.watchdog_period); + + /* We take a cached value of the unix time in the global state because + * with virtual memory and aging there is to store the current time + * in objects at every object access, and accuracy is not needed. + * To access a global var is faster than calling time(NULL) */ + server.unixtime = time(NULL); + + run_with_period(100) trackOperationsPerSecond(); + + /* We have just 22 bits per object for LRU information. + * So we use an (eventually wrapping) LRU clock with 10 seconds resolution. + * 2^22 bits with 10 seconds resoluton is more or less 1.5 years. + * + * Note that even if this will wrap after 1.5 years it's not a problem, + * everything will still work but just some object will appear younger + * to Redis. But for this to happen a given object should never be touched + * for 1.5 years. + * + * Note that you can change the resolution altering the + * REDIS_LRU_CLOCK_RESOLUTION define. + */ + updateLRUClock(); + + /* Record the max memory used since the server was started. */ + if (zmalloc_used_memory() > server.stat_peak_memory) + server.stat_peak_memory = zmalloc_used_memory(); + + /* We received a SIGTERM, shutting down here in a safe way, as it is + * not ok doing so inside the signal handler. */ + if (server.shutdown_asap) { + if (prepareForShutdown(0) == REDIS_OK) exit(0); + redisLog(REDIS_WARNING,"SIGTERM received but errors trying to shut down the server, check the logs for more information"); + } + + /* Show some info about non-empty databases */ + run_with_period(5000) { + for (j = 0; j < server.dbnum; j++) { + long long size, used, vkeys; + + size = dictSlots(server.db[j].dict); + used = dictSize(server.db[j].dict); + vkeys = dictSize(server.db[j].expires); + if (used || vkeys) { + redisLog(REDIS_VERBOSE,"DB %d: %lld keys (%lld volatile) in %lld slots HT.",j,used,vkeys,size); + /* dictPrintStats(server.dict); */ + } + } + } + + /* We don't want to resize the hash tables while a bacground saving + * is in progress: the saving child is created using fork() that is + * implemented with a copy-on-write semantic in most modern systems, so + * if we resize the HT while there is the saving child at work actually + * a lot of memory movements in the parent will cause a lot of pages + * copied. */ + if (server.rdb_child_pid == -1 && server.aof_child_pid == -1) { + tryResizeHashTables(); + if (server.activerehashing) incrementallyRehash(); + } + + /* Show information about connected clients */ + run_with_period(5000) { + redisLog(REDIS_VERBOSE,"%d clients connected (%d slaves), %zu bytes in use", + listLength(server.clients)-listLength(server.slaves), + listLength(server.slaves), + zmalloc_used_memory()); + } + + /* We need to do a few operations on clients asynchronously. */ + clientsCron(); + + /* Start a scheduled AOF rewrite if this was requested by the user while + * a BGSAVE was in progress. */ + if (server.rdb_child_pid == -1 && server.aof_child_pid == -1 && + server.aof_rewrite_scheduled) + { + rewriteAppendOnlyFileBackground(); + } + + /* Check if a background saving or AOF rewrite in progress terminated. */ + if (server.rdb_child_pid != -1 || server.aof_child_pid != -1) { + int statloc; + pid_t pid; + + if ((pid = wait3(&statloc,WNOHANG,NULL)) != 0) { + int exitcode = WEXITSTATUS(statloc); + int bysignal = 0; + + if (WIFSIGNALED(statloc)) bysignal = WTERMSIG(statloc); + + if (pid == server.rdb_child_pid) { + backgroundSaveDoneHandler(exitcode,bysignal); + } else { + backgroundRewriteDoneHandler(exitcode,bysignal); + } + updateDictResizePolicy(); + } + } else { + /* If there is not a background saving/rewrite in progress check if + * we have to save/rewrite now */ + for (j = 0; j < server.saveparamslen; j++) { + struct saveparam *sp = server.saveparams+j; + + if (server.dirty >= sp->changes && + server.unixtime-server.lastsave > sp->seconds) { + redisLog(REDIS_NOTICE,"%d changes in %d seconds. Saving...", + sp->changes, sp->seconds); + rdbSaveBackground(server.rdb_filename); + break; + } + } + + /* Trigger an AOF rewrite if needed */ + if (server.rdb_child_pid == -1 && + server.aof_child_pid == -1 && + server.aof_rewrite_perc && + server.aof_current_size > server.aof_rewrite_min_size) + { + long long base = server.aof_rewrite_base_size ? + server.aof_rewrite_base_size : 1; + long long growth = (server.aof_current_size*100/base) - 100; + if (growth >= server.aof_rewrite_perc) { + redisLog(REDIS_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth); + rewriteAppendOnlyFileBackground(); + } + } + } + + + /* If we postponed an AOF buffer flush, let's try to do it every time the + * cron function is called. */ + if (server.aof_flush_postponed_start) flushAppendOnlyFile(0); + + /* Expire a few keys per cycle, only if this is a master. + * On slaves we wait for DEL operations synthesized by the master + * in order to guarantee a strict consistency. */ + if (server.masterhost == NULL) activeExpireCycle(); + + /* Close clients that need to be closed asynchronous */ + freeClientsInAsyncFreeQueue(); + + /* Replication cron function -- used to reconnect to master and + * to detect transfer failures. */ + run_with_period(1000) replicationCron(); + + /* Run other sub-systems specific cron jobs */ + run_with_period(1000) { + if (server.cluster_enabled) clusterCron(); + } + + server.cronloops++; + return 1000/REDIS_HZ; +} + +/* This function gets called every time Redis is entering the + * main loop of the event driven library, that is, before to sleep + * for ready file descriptors. */ +void beforeSleep(struct aeEventLoop *eventLoop) { + REDIS_NOTUSED(eventLoop); + listNode *ln; + redisClient *c; + + /* Try to process pending commands for clients that were just unblocked. */ + while (listLength(server.unblocked_clients)) { + ln = listFirst(server.unblocked_clients); + redisAssert(ln != NULL); + c = ln->value; + listDelNode(server.unblocked_clients,ln); + c->flags &= ~REDIS_UNBLOCKED; + + /* Process remaining data in the input buffer. */ + if (c->querybuf && sdslen(c->querybuf) > 0) { + server.current_client = c; + processInputBuffer(c); + server.current_client = NULL; + } + } + + /* Write the AOF buffer on disk */ + flushAppendOnlyFile(0); +} + +/* =========================== Server initialization ======================== */ + +void createSharedObjects(void) { + int j; + + shared.crlf = createObject(REDIS_STRING,sdsnew("\r\n")); + shared.ok = createObject(REDIS_STRING,sdsnew("+OK\r\n")); + shared.err = createObject(REDIS_STRING,sdsnew("-ERR\r\n")); + shared.emptybulk = createObject(REDIS_STRING,sdsnew("$0\r\n\r\n")); + shared.czero = createObject(REDIS_STRING,sdsnew(":0\r\n")); + shared.cone = createObject(REDIS_STRING,sdsnew(":1\r\n")); + shared.cnegone = createObject(REDIS_STRING,sdsnew(":-1\r\n")); + shared.nullbulk = createObject(REDIS_STRING,sdsnew("$-1\r\n")); + shared.nullmultibulk = createObject(REDIS_STRING,sdsnew("*-1\r\n")); + shared.emptymultibulk = createObject(REDIS_STRING,sdsnew("*0\r\n")); + shared.pong = createObject(REDIS_STRING,sdsnew("+PONG\r\n")); + shared.queued = createObject(REDIS_STRING,sdsnew("+QUEUED\r\n")); + shared.wrongtypeerr = createObject(REDIS_STRING,sdsnew( + "-ERR Operation against a key holding the wrong kind of value\r\n")); + shared.nokeyerr = createObject(REDIS_STRING,sdsnew( + "-ERR no such key\r\n")); + shared.syntaxerr = createObject(REDIS_STRING,sdsnew( + "-ERR syntax error\r\n")); + shared.sameobjecterr = createObject(REDIS_STRING,sdsnew( + "-ERR source and destination objects are the same\r\n")); + shared.outofrangeerr = createObject(REDIS_STRING,sdsnew( + "-ERR index out of range\r\n")); + shared.noscripterr = createObject(REDIS_STRING,sdsnew( + "-NOSCRIPT No matching script. Please use EVAL.\r\n")); + shared.loadingerr = createObject(REDIS_STRING,sdsnew( + "-LOADING Redis is loading the dataset in memory\r\n")); + shared.slowscripterr = createObject(REDIS_STRING,sdsnew( + "-BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.\r\n")); + shared.masterdownerr = createObject(REDIS_STRING,sdsnew( + "-MASTERDOWN Link with MASTER is down and slave-serve-stale-data is set to 'no'.\r\n")); + shared.bgsaveerr = createObject(REDIS_STRING,sdsnew( + "-MISCONF Redis is configured to save RDB snapshots, but is currently not able to persist on disk. Commands that may modify the data set are disabled. Please check Redis logs for details about the error.\r\n")); + shared.roslaveerr = createObject(REDIS_STRING,sdsnew( + "-READONLY You can't write against a read only slave.\r\n")); + shared.oomerr = createObject(REDIS_STRING,sdsnew( + "-OOM command not allowed when used memory > 'maxmemory'.\r\n")); + shared.space = createObject(REDIS_STRING,sdsnew(" ")); + shared.colon = createObject(REDIS_STRING,sdsnew(":")); + shared.plus = createObject(REDIS_STRING,sdsnew("+")); + + for (j = 0; j < REDIS_SHARED_SELECT_CMDS; j++) { + shared.select[j] = createObject(REDIS_STRING, + sdscatprintf(sdsempty(),"select %d\r\n", j)); + } + shared.messagebulk = createStringObject("$7\r\nmessage\r\n",13); + shared.pmessagebulk = createStringObject("$8\r\npmessage\r\n",14); + shared.subscribebulk = createStringObject("$9\r\nsubscribe\r\n",15); + shared.unsubscribebulk = createStringObject("$11\r\nunsubscribe\r\n",18); + shared.psubscribebulk = createStringObject("$10\r\npsubscribe\r\n",17); + shared.punsubscribebulk = createStringObject("$12\r\npunsubscribe\r\n",19); + shared.del = createStringObject("DEL",3); + shared.rpop = createStringObject("RPOP",4); + shared.lpop = createStringObject("LPOP",4); + for (j = 0; j < REDIS_SHARED_INTEGERS; j++) { + shared.integers[j] = createObject(REDIS_STRING,(void*)(long)j); + shared.integers[j]->encoding = REDIS_ENCODING_INT; + } + for (j = 0; j < REDIS_SHARED_BULKHDR_LEN; j++) { + shared.mbulkhdr[j] = createObject(REDIS_STRING, + sdscatprintf(sdsempty(),"*%d\r\n",j)); + shared.bulkhdr[j] = createObject(REDIS_STRING, + sdscatprintf(sdsempty(),"$%d\r\n",j)); + } +} + +void initServerConfig() { + getRandomHexChars(server.runid,REDIS_RUN_ID_SIZE); + server.runid[REDIS_RUN_ID_SIZE] = '\0'; + server.arch_bits = (sizeof(long) == 8) ? 64 : 32; + server.port = REDIS_SERVERPORT; + server.bindaddr = NULL; + server.unixsocket = NULL; + server.unixsocketperm = 0; + server.ipfd = -1; + server.sofd = -1; + server.dbnum = REDIS_DEFAULT_DBNUM; + server.verbosity = REDIS_NOTICE; + server.maxidletime = REDIS_MAXIDLETIME; + server.client_max_querybuf_len = REDIS_MAX_QUERYBUF_LEN; + server.saveparams = NULL; + server.loading = 0; + server.logfile = NULL; /* NULL = log on standard output */ + server.syslog_enabled = 0; + server.syslog_ident = zstrdup("redis"); + server.syslog_facility = LOG_LOCAL0; + server.daemonize = 0; + server.aof_state = REDIS_AOF_OFF; + server.aof_fsync = AOF_FSYNC_EVERYSEC; + server.aof_no_fsync_on_rewrite = 0; + server.aof_rewrite_perc = REDIS_AOF_REWRITE_PERC; + server.aof_rewrite_min_size = REDIS_AOF_REWRITE_MIN_SIZE; + server.aof_rewrite_base_size = 0; + server.aof_rewrite_scheduled = 0; + server.aof_last_fsync = time(NULL); + server.aof_rewrite_time_last = -1; + server.aof_rewrite_time_start = -1; + server.aof_delayed_fsync = 0; + server.aof_fd = -1; + server.aof_selected_db = -1; /* Make sure the first time will not match */ + server.aof_flush_postponed_start = 0; + server.pidfile = zstrdup("/var/run/redis.pid"); + server.rdb_filename = zstrdup("dump.rdb"); + server.aof_filename = zstrdup("appendonly.aof"); + server.requirepass = NULL; + server.rdb_compression = 1; + server.rdb_checksum = 1; + server.activerehashing = 1; + server.maxclients = REDIS_MAX_CLIENTS; + server.bpop_blocked_clients = 0; + server.maxmemory = 0; + server.maxmemory_policy = REDIS_MAXMEMORY_VOLATILE_LRU; + server.maxmemory_samples = 3; + server.hash_max_ziplist_entries = REDIS_HASH_MAX_ZIPLIST_ENTRIES; + server.hash_max_ziplist_value = REDIS_HASH_MAX_ZIPLIST_VALUE; + server.list_max_ziplist_entries = REDIS_LIST_MAX_ZIPLIST_ENTRIES; + server.list_max_ziplist_value = REDIS_LIST_MAX_ZIPLIST_VALUE; + server.set_max_intset_entries = REDIS_SET_MAX_INTSET_ENTRIES; + server.zset_max_ziplist_entries = REDIS_ZSET_MAX_ZIPLIST_ENTRIES; + server.zset_max_ziplist_value = REDIS_ZSET_MAX_ZIPLIST_VALUE; + server.shutdown_asap = 0; + server.repl_ping_slave_period = REDIS_REPL_PING_SLAVE_PERIOD; + server.repl_timeout = REDIS_REPL_TIMEOUT; + server.cluster_enabled = 0; + server.cluster.configfile = zstrdup("nodes.conf"); + server.lua_caller = NULL; + server.lua_time_limit = REDIS_LUA_TIME_LIMIT; + server.lua_client = NULL; + server.lua_timedout = 0; + + updateLRUClock(); + resetServerSaveParams(); + + appendServerSaveParams(60*60,1); /* save after 1 hour and 1 change */ + appendServerSaveParams(300,100); /* save after 5 minutes and 100 changes */ + appendServerSaveParams(60,10000); /* save after 1 minute and 10000 changes */ + /* Replication related */ + server.masterauth = NULL; + server.masterhost = NULL; + server.masterport = 6379; + server.master = NULL; + server.repl_state = REDIS_REPL_NONE; + server.repl_syncio_timeout = REDIS_REPL_SYNCIO_TIMEOUT; + server.repl_serve_stale_data = 1; + server.repl_slave_ro = 1; + server.repl_down_since = time(NULL); + + /* Client output buffer limits */ + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].hard_limit_bytes = 0; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_bytes = 0; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_NORMAL].soft_limit_seconds = 0; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].hard_limit_bytes = 1024*1024*256; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_bytes = 1024*1024*64; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_SLAVE].soft_limit_seconds = 60; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].hard_limit_bytes = 1024*1024*32; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_bytes = 1024*1024*8; + server.client_obuf_limits[REDIS_CLIENT_LIMIT_CLASS_PUBSUB].soft_limit_seconds = 60; + + /* Double constants initialization */ + R_Zero = 0.0; + R_PosInf = 1.0/R_Zero; + R_NegInf = -1.0/R_Zero; + R_Nan = R_Zero/R_Zero; + + /* Command table -- we intiialize it here as it is part of the + * initial configuration, since command names may be changed via + * redis.conf using the rename-command directive. */ + server.commands = dictCreate(&commandTableDictType,NULL); + populateCommandTable(); + server.delCommand = lookupCommandByCString("del"); + server.multiCommand = lookupCommandByCString("multi"); + server.lpushCommand = lookupCommandByCString("lpush"); + + /* Slow log */ + server.slowlog_log_slower_than = REDIS_SLOWLOG_LOG_SLOWER_THAN; + server.slowlog_max_len = REDIS_SLOWLOG_MAX_LEN; + + /* Debugging */ + server.assert_failed = ""; + server.assert_file = ""; + server.assert_line = 0; + server.bug_report_start = 0; + server.watchdog_period = 0; +} + +/* This function will try to raise the max number of open files accordingly to + * the configured max number of clients. It will also account for 32 additional + * file descriptors as we need a few more for persistence, listening + * sockets, log files and so forth. + * + * If it will not be possible to set the limit accordingly to the configured + * max number of clients, the function will do the reverse setting + * server.maxclients to the value that we can actually handle. */ +void adjustOpenFilesLimit(void) { + rlim_t maxfiles = server.maxclients+32; + struct rlimit limit; + + if (getrlimit(RLIMIT_NOFILE,&limit) == -1) { + redisLog(REDIS_WARNING,"Unable to obtain the current NOFILE limit (%s), assuming 1024 and setting the max clients configuration accordingly.", + strerror(errno)); + server.maxclients = 1024-32; + } else { + rlim_t oldlimit = limit.rlim_cur; + + /* Set the max number of files if the current limit is not enough + * for our needs. */ + if (oldlimit < maxfiles) { + rlim_t f; + + f = maxfiles; + while(f > oldlimit) { + limit.rlim_cur = f; + limit.rlim_max = f; + if (setrlimit(RLIMIT_NOFILE,&limit) != -1) break; + f -= 128; + } + if (f < oldlimit) f = oldlimit; + if (f != maxfiles) { + server.maxclients = f-32; + redisLog(REDIS_WARNING,"Unable to set the max number of files limit to %d (%s), setting the max clients configuration to %d.", + (int) maxfiles, strerror(errno), (int) server.maxclients); + } else { + redisLog(REDIS_NOTICE,"Max number of open files set to %d", + (int) maxfiles); + } + } + } +} + +void initServer() { + int j; + + signal(SIGHUP, SIG_IGN); + signal(SIGPIPE, SIG_IGN); + setupSignalHandlers(); + + if (server.syslog_enabled) { + openlog(server.syslog_ident, LOG_PID | LOG_NDELAY | LOG_NOWAIT, + server.syslog_facility); + } + + server.current_client = NULL; + server.clients = listCreate(); + server.clients_to_close = listCreate(); + server.slaves = listCreate(); + server.monitors = listCreate(); + server.unblocked_clients = listCreate(); + + createSharedObjects(); + adjustOpenFilesLimit(); + server.el = aeCreateEventLoop(server.maxclients+1024); + server.db = zmalloc(sizeof(redisDb)*server.dbnum); + + if (server.port != 0) { + server.ipfd = anetTcpServer(server.neterr,server.port,server.bindaddr); + if (server.ipfd == ANET_ERR) { + redisLog(REDIS_WARNING, "Opening port %d: %s", + server.port, server.neterr); + exit(1); + } + } + if (server.unixsocket != NULL) { + unlink(server.unixsocket); /* don't care if this fails */ + server.sofd = anetUnixServer(server.neterr,server.unixsocket,server.unixsocketperm); + if (server.sofd == ANET_ERR) { + redisLog(REDIS_WARNING, "Opening socket: %s", server.neterr); + exit(1); + } + } + if (server.ipfd < 0 && server.sofd < 0) { + redisLog(REDIS_WARNING, "Configured to not listen anywhere, exiting."); + exit(1); + } + for (j = 0; j < server.dbnum; j++) { + server.db[j].dict = dictCreate(&dbDictType,NULL); + server.db[j].expires = dictCreate(&keyptrDictType,NULL); + server.db[j].blocking_keys = dictCreate(&keylistDictType,NULL); + server.db[j].watched_keys = dictCreate(&keylistDictType,NULL); + server.db[j].id = j; + } + server.pubsub_channels = dictCreate(&keylistDictType,NULL); + server.pubsub_patterns = listCreate(); + listSetFreeMethod(server.pubsub_patterns,freePubsubPattern); + listSetMatchMethod(server.pubsub_patterns,listMatchPubsubPattern); + server.cronloops = 0; + server.rdb_child_pid = -1; + server.aof_child_pid = -1; + aofRewriteBufferReset(); + server.aof_buf = sdsempty(); + server.lastsave = time(NULL); + server.rdb_save_time_last = -1; + server.rdb_save_time_start = -1; + server.dirty = 0; + server.stat_numcommands = 0; + server.stat_numconnections = 0; + server.stat_expiredkeys = 0; + server.stat_evictedkeys = 0; + server.stat_starttime = time(NULL); + server.stat_keyspace_misses = 0; + server.stat_keyspace_hits = 0; + server.stat_peak_memory = 0; + server.stat_fork_time = 0; + server.stat_rejected_conn = 0; + memset(server.ops_sec_samples,0,sizeof(server.ops_sec_samples)); + server.ops_sec_idx = 0; + server.ops_sec_last_sample_time = mstime(); + server.ops_sec_last_sample_ops = 0; + server.unixtime = time(NULL); + server.lastbgsave_status = REDIS_OK; + server.stop_writes_on_bgsave_err = 1; + aeCreateTimeEvent(server.el, 1, serverCron, NULL, NULL); + if (server.ipfd > 0 && aeCreateFileEvent(server.el,server.ipfd,AE_READABLE, + acceptTcpHandler,NULL) == AE_ERR) oom("creating file event"); + if (server.sofd > 0 && aeCreateFileEvent(server.el,server.sofd,AE_READABLE, + acceptUnixHandler,NULL) == AE_ERR) oom("creating file event"); + + if (server.aof_state == REDIS_AOF_ON) { + server.aof_fd = open(server.aof_filename, + O_WRONLY|O_APPEND|O_CREAT,0644); + if (server.aof_fd == -1) { + redisLog(REDIS_WARNING, "Can't open the append-only file: %s", + strerror(errno)); + exit(1); + } + } + + /* 32 bit instances are limited to 4GB of address space, so if there is + * no explicit limit in the user provided configuration we set a limit + * at 3.5GB using maxmemory with 'noeviction' policy'. This saves + * useless crashes of the Redis instance. */ + if (server.arch_bits == 32 && server.maxmemory == 0) { + redisLog(REDIS_WARNING,"Warning: 32 bit instance detected but no memory limit set. Setting 3.5 GB maxmemory limit with 'noeviction' policy now."); + server.maxmemory = 3584LL*(1024*1024); /* 3584 MB = 3.5 GB */ + server.maxmemory_policy = REDIS_MAXMEMORY_NO_EVICTION; + } + + if (server.cluster_enabled) clusterInit(); + scriptingInit(); + slowlogInit(); + bioInit(); +} + +/* Populates the Redis Command Table starting from the hard coded list + * we have on top of redis.c file. */ +void populateCommandTable(void) { + int j; + int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); + + for (j = 0; j < numcommands; j++) { + struct redisCommand *c = redisCommandTable+j; + char *f = c->sflags; + int retval; + + while(*f != '\0') { + switch(*f) { + case 'w': c->flags |= REDIS_CMD_WRITE; break; + case 'r': c->flags |= REDIS_CMD_READONLY; break; + case 'm': c->flags |= REDIS_CMD_DENYOOM; break; + case 'a': c->flags |= REDIS_CMD_ADMIN; break; + case 'p': c->flags |= REDIS_CMD_PUBSUB; break; + case 'f': c->flags |= REDIS_CMD_FORCE_REPLICATION; break; + case 's': c->flags |= REDIS_CMD_NOSCRIPT; break; + case 'R': c->flags |= REDIS_CMD_RANDOM; break; + case 'S': c->flags |= REDIS_CMD_SORT_FOR_SCRIPT; break; + default: redisPanic("Unsupported command flag"); break; + } + f++; + } + + retval = dictAdd(server.commands, sdsnew(c->name), c); + assert(retval == DICT_OK); + } +} + +void resetCommandTableStats(void) { + int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); + int j; + + for (j = 0; j < numcommands; j++) { + struct redisCommand *c = redisCommandTable+j; + + c->microseconds = 0; + c->calls = 0; + } +} + +/* ========================== Redis OP Array API ============================ */ + +void redisOpArrayInit(redisOpArray *oa) { + oa->ops = NULL; + oa->numops = 0; +} + +int redisOpArrayAppend(redisOpArray *oa, struct redisCommand *cmd, int dbid, + robj **argv, int argc, int target) +{ + redisOp *op; + + oa->ops = zrealloc(oa->ops,sizeof(redisOp)*(oa->numops+1)); + op = oa->ops+oa->numops; + op->cmd = cmd; + op->dbid = dbid; + op->argv = argv; + op->argc = argc; + op->target = target; + oa->numops++; + return oa->numops; +} + +void redisOpArrayFree(redisOpArray *oa) { + while(oa->numops) { + int j; + redisOp *op; + + oa->numops--; + op = oa->ops+oa->numops; + for (j = 0; j < op->argc; j++) + decrRefCount(op->argv[j]); + zfree(op->argv); + } + zfree(oa->ops); +} + +/* ====================== Commands lookup and execution ===================== */ + +struct redisCommand *lookupCommand(sds name) { + return dictFetchValue(server.commands, name); +} + +struct redisCommand *lookupCommandByCString(char *s) { + struct redisCommand *cmd; + sds name = sdsnew(s); + + cmd = dictFetchValue(server.commands, name); + sdsfree(name); + return cmd; +} + +/* Propagate the specified command (in the context of the specified database id) + * to AOF, Slaves and Monitors. + * + * flags are an xor between: + * + REDIS_PROPAGATE_NONE (no propagation of command at all) + * + REDIS_PROPAGATE_AOF (propagate into the AOF file if is enabled) + * + REDIS_PROPAGATE_REPL (propagate into the replication link) + */ +void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int flags) +{ + if (server.aof_state != REDIS_AOF_OFF && flags & REDIS_PROPAGATE_AOF) + feedAppendOnlyFile(cmd,dbid,argv,argc); + if (flags & REDIS_PROPAGATE_REPL && listLength(server.slaves)) + replicationFeedSlaves(server.slaves,dbid,argv,argc); +} + +/* Used inside commands to schedule the propagation of additional commands + * after the current command is propagated to AOF / Replication. */ +void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, + int target) +{ + redisOpArrayAppend(&server.also_propagate,cmd,dbid,argv,argc,target); +} + +/* Call() is the core of Redis execution of a command */ +void call(redisClient *c, int flags) { + long long dirty, start = ustime(), duration; + + /* Sent the command to clients in MONITOR mode, only if the commands are + * not geneated from reading an AOF. */ + if (listLength(server.monitors) && !server.loading) + replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc); + + /* Call the command. */ + redisOpArrayInit(&server.also_propagate); + dirty = server.dirty; + c->cmd->proc(c); + dirty = server.dirty-dirty; + duration = ustime()-start; + + /* When EVAL is called loading the AOF we don't want commands called + * from Lua to go into the slowlog or to populate statistics. */ + if (server.loading && c->flags & REDIS_LUA_CLIENT) + flags &= ~(REDIS_CALL_SLOWLOG | REDIS_CALL_STATS); + + /* Log the command into the Slow log if needed, and populate the + * per-command statistics that we show in INFO commandstats. */ + if (flags & REDIS_CALL_SLOWLOG) + slowlogPushEntryIfNeeded(c->argv,c->argc,duration); + if (flags & REDIS_CALL_STATS) { + c->cmd->microseconds += duration; + c->cmd->calls++; + } + + /* Propagate the command into the AOF and replication link */ + if (flags & REDIS_CALL_PROPAGATE) { + int flags = REDIS_PROPAGATE_NONE; + + if (c->cmd->flags & REDIS_CMD_FORCE_REPLICATION) + flags |= REDIS_PROPAGATE_REPL; + if (dirty) + flags |= (REDIS_PROPAGATE_REPL | REDIS_PROPAGATE_AOF); + if (flags != REDIS_PROPAGATE_NONE) + propagate(c->cmd,c->db->id,c->argv,c->argc,flags); + } + /* Commands such as LPUSH or BRPOPLPUSH may propagate an additional + * PUSH command. */ + if (server.also_propagate.numops) { + int j; + redisOp *rop; + + for (j = 0; j < server.also_propagate.numops; j++) { + rop = &server.also_propagate.ops[j]; + propagate(rop->cmd, rop->dbid, rop->argv, rop->argc, rop->target); + } + redisOpArrayFree(&server.also_propagate); + } + server.stat_numcommands++; +} + +/* If this function gets called we already read a whole + * command, argments are in the client argv/argc fields. + * processCommand() execute the command or prepare the + * server for a bulk read from the client. + * + * If 1 is returned the client is still alive and valid and + * and other operations can be performed by the caller. Otherwise + * if 0 is returned the client was destroied (i.e. after QUIT). */ +int processCommand(redisClient *c) { + /* The QUIT command is handled separately. Normal command procs will + * go through checking for replication and QUIT will cause trouble + * when FORCE_REPLICATION is enabled and would be implemented in + * a regular command proc. */ + if (!strcasecmp(c->argv[0]->ptr,"quit")) { + addReply(c,shared.ok); + c->flags |= REDIS_CLOSE_AFTER_REPLY; + return REDIS_ERR; + } + + /* Now lookup the command and check ASAP about trivial error conditions + * such as wrong arity, bad command name and so forth. */ + c->cmd = c->lastcmd = lookupCommand(c->argv[0]->ptr); + if (!c->cmd) { + addReplyErrorFormat(c,"unknown command '%s'", + (char*)c->argv[0]->ptr); + return REDIS_OK; + } else if ((c->cmd->arity > 0 && c->cmd->arity != c->argc) || + (c->argc < -c->cmd->arity)) { + addReplyErrorFormat(c,"wrong number of arguments for '%s' command", + c->cmd->name); + return REDIS_OK; + } + + /* Check if the user is authenticated */ + if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand) + { + addReplyError(c,"operation not permitted"); + return REDIS_OK; + } + + /* If cluster is enabled, redirect here */ + if (server.cluster_enabled && + !(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == 0)) { + int hashslot; + + if (server.cluster.state != REDIS_CLUSTER_OK) { + addReplyError(c,"The cluster is down. Check with CLUSTER INFO for more information"); + return REDIS_OK; + } else { + int ask; + clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&ask); + if (n == NULL) { + addReplyError(c,"Multi keys request invalid in cluster"); + return REDIS_OK; + } else if (n != server.cluster.myself) { + addReplySds(c,sdscatprintf(sdsempty(), + "-%s %d %s:%d\r\n", ask ? "ASK" : "MOVED", + hashslot,n->ip,n->port)); + return REDIS_OK; + } + } + } + + /* Handle the maxmemory directive. + * + * First we try to free some memory if possible (if there are volatile + * keys in the dataset). If there are not the only thing we can do + * is returning an error. */ + if (server.maxmemory) { + int retval = freeMemoryIfNeeded(); + if ((c->cmd->flags & REDIS_CMD_DENYOOM) && retval == REDIS_ERR) { + addReply(c, shared.oomerr); + return REDIS_OK; + } + } + + /* Don't accept write commands if there are problems persisting on disk. */ + if (server.stop_writes_on_bgsave_err && + server.saveparamslen > 0 + && server.lastbgsave_status == REDIS_ERR && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.bgsaveerr); + return REDIS_OK; + } + + /* Don't accept wirte commands if this is a read only slave. But + * accept write commands if this is our master. */ + if (server.masterhost && server.repl_slave_ro && + !(c->flags & REDIS_MASTER) && + c->cmd->flags & REDIS_CMD_WRITE) + { + addReply(c, shared.roslaveerr); + return REDIS_OK; + } + + /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */ + if ((dictSize(c->pubsub_channels) > 0 || listLength(c->pubsub_patterns) > 0) + && + c->cmd->proc != subscribeCommand && + c->cmd->proc != unsubscribeCommand && + c->cmd->proc != psubscribeCommand && + c->cmd->proc != punsubscribeCommand) { + addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / QUIT allowed in this context"); + return REDIS_OK; + } + + /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and + * we are a slave with a broken link with master. */ + if (server.masterhost && server.repl_state != REDIS_REPL_CONNECTED && + server.repl_serve_stale_data == 0 && + c->cmd->proc != infoCommand && c->cmd->proc != slaveofCommand) + { + addReply(c, shared.masterdownerr); + return REDIS_OK; + } + + /* Loading DB? Return an error if the command is not INFO */ + if (server.loading && c->cmd->proc != infoCommand) { + addReply(c, shared.loadingerr); + return REDIS_OK; + } + + /* Lua script too slow? Only allow SHUTDOWN NOSAVE and SCRIPT KILL. */ + if (server.lua_timedout && + !(c->cmd->proc == shutdownCommand && + c->argc == 2 && + tolower(((char*)c->argv[1]->ptr)[0]) == 'n') && + !(c->cmd->proc == scriptCommand && + c->argc == 2 && + tolower(((char*)c->argv[1]->ptr)[0]) == 'k')) + { + addReply(c, shared.slowscripterr); + return REDIS_OK; + } + + /* Exec the command */ + if (c->flags & REDIS_MULTI && + c->cmd->proc != execCommand && c->cmd->proc != discardCommand && + c->cmd->proc != multiCommand && c->cmd->proc != watchCommand) + { + queueMultiCommand(c); + addReply(c,shared.queued); + } else { + call(c,REDIS_CALL_FULL); + } + return REDIS_OK; +} + +/*================================== Shutdown =============================== */ + +int prepareForShutdown(int flags) { + int save = flags & REDIS_SHUTDOWN_SAVE; + int nosave = flags & REDIS_SHUTDOWN_NOSAVE; + + redisLog(REDIS_WARNING,"User requested shutdown..."); + /* Kill the saving child if there is a background saving in progress. + We want to avoid race conditions, for instance our saving child may + overwrite the synchronous saving did by SHUTDOWN. */ + if (server.rdb_child_pid != -1) { + redisLog(REDIS_WARNING,"There is a child saving an .rdb. Killing it!"); + kill(server.rdb_child_pid,SIGKILL); + rdbRemoveTempFile(server.rdb_child_pid); + } + if (server.aof_state != REDIS_AOF_OFF) { + /* Kill the AOF saving child as the AOF we already have may be longer + * but contains the full dataset anyway. */ + if (server.aof_child_pid != -1) { + redisLog(REDIS_WARNING, + "There is a child rewriting the AOF. Killing it!"); + kill(server.aof_child_pid,SIGKILL); + } + /* Append only file: fsync() the AOF and exit */ + redisLog(REDIS_NOTICE,"Calling fsync() on the AOF file."); + aof_fsync(server.aof_fd); + } + if ((server.saveparamslen > 0 && !nosave) || save) { + redisLog(REDIS_NOTICE,"Saving the final RDB snapshot before exiting."); + /* Snapshotting. Perform a SYNC SAVE and exit */ + if (rdbSave(server.rdb_filename) != REDIS_OK) { + /* Ooops.. error saving! The best we can do is to continue + * operating. Note that if there was a background saving process, + * in the next cron() Redis will be notified that the background + * saving aborted, handling special stuff like slaves pending for + * synchronization... */ + redisLog(REDIS_WARNING,"Error trying to save the DB, can't exit."); + return REDIS_ERR; + } + } + if (server.daemonize) { + redisLog(REDIS_NOTICE,"Removing the pid file."); + unlink(server.pidfile); + } + /* Close the listening sockets. Apparently this allows faster restarts. */ + if (server.ipfd != -1) close(server.ipfd); + if (server.sofd != -1) close(server.sofd); + if (server.unixsocket) { + redisLog(REDIS_NOTICE,"Removing the unix socket file."); + unlink(server.unixsocket); /* don't care if this fails */ + } + + redisLog(REDIS_WARNING,"Redis is now ready to exit, bye bye..."); + return REDIS_OK; +} + +/*================================== Commands =============================== */ + +void authCommand(redisClient *c) { + if (!server.requirepass) { + addReplyError(c,"Client sent AUTH, but no password is set"); + } else if (!strcmp(c->argv[1]->ptr, server.requirepass)) { + c->authenticated = 1; + addReply(c,shared.ok); + } else { + c->authenticated = 0; + addReplyError(c,"invalid password"); + } +} + +void pingCommand(redisClient *c) { + addReply(c,shared.pong); +} + +void echoCommand(redisClient *c) { + addReplyBulk(c,c->argv[1]); +} + +void timeCommand(redisClient *c) { + struct timeval tv; + + /* gettimeofday() can only fail if &tv is a bad addresss so we + * don't check for errors. */ + gettimeofday(&tv,NULL); + addReplyMultiBulkLen(c,2); + addReplyBulkLongLong(c,tv.tv_sec); + addReplyBulkLongLong(c,tv.tv_usec); +} + +/* Convert an amount of bytes into a human readable string in the form + * of 100B, 2G, 100M, 4K, and so forth. */ +void bytesToHuman(char *s, unsigned long long n) { + double d; + + if (n < 1024) { + /* Bytes */ + sprintf(s,"%lluB",n); + return; + } else if (n < (1024*1024)) { + d = (double)n/(1024); + sprintf(s,"%.2fK",d); + } else if (n < (1024LL*1024*1024)) { + d = (double)n/(1024*1024); + sprintf(s,"%.2fM",d); + } else if (n < (1024LL*1024*1024*1024)) { + d = (double)n/(1024LL*1024*1024); + sprintf(s,"%.2fG",d); + } +} + +/* Create the string returned by the INFO command. This is decoupled + * by the INFO command itself as we need to report the same information + * on memory corruption problems. */ +sds genRedisInfoString(char *section) { + sds info = sdsempty(); + time_t uptime = server.unixtime-server.stat_starttime; + int j, numcommands; + struct rusage self_ru, c_ru; + unsigned long lol, bib; + int allsections = 0, defsections = 0; + int sections = 0; + + if (section) { + allsections = strcasecmp(section,"all") == 0; + defsections = strcasecmp(section,"default") == 0; + } + + getrusage(RUSAGE_SELF, &self_ru); + getrusage(RUSAGE_CHILDREN, &c_ru); + getClientsMaxBuffers(&lol,&bib); + + /* Server */ + if (allsections || defsections || !strcasecmp(section,"server")) { + struct utsname name; + + if (sections++) info = sdscat(info,"\r\n"); + uname(&name); + info = sdscatprintf(info, + "# Server\r\n" + "redis_version:%s\r\n" + "redis_git_sha1:%s\r\n" + "redis_git_dirty:%d\r\n" + "os:%s %s %s\r\n" + "arch_bits:%d\r\n" + "multiplexing_api:%s\r\n" + "gcc_version:%d.%d.%d\r\n" + "process_id:%ld\r\n" + "run_id:%s\r\n" + "tcp_port:%d\r\n" + "uptime_in_seconds:%ld\r\n" + "uptime_in_days:%ld\r\n" + "lru_clock:%ld\r\n", + REDIS_VERSION, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + name.sysname, name.release, name.machine, + server.arch_bits, + aeGetApiName(), +#ifdef __GNUC__ + __GNUC__,__GNUC_MINOR__,__GNUC_PATCHLEVEL__, +#else + 0,0,0, +#endif + (long) getpid(), + server.runid, + server.port, + uptime, + uptime/(3600*24), + (unsigned long) server.lruclock); + } + + /* Clients */ + if (allsections || defsections || !strcasecmp(section,"clients")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Clients\r\n" + "connected_clients:%lu\r\n" + "client_longest_output_list:%lu\r\n" + "client_biggest_input_buf:%lu\r\n" + "blocked_clients:%d\r\n", + listLength(server.clients)-listLength(server.slaves), + lol, bib, + server.bpop_blocked_clients); + } + + /* Memory */ + if (allsections || defsections || !strcasecmp(section,"memory")) { + char hmem[64]; + char peak_hmem[64]; + + bytesToHuman(hmem,zmalloc_used_memory()); + bytesToHuman(peak_hmem,server.stat_peak_memory); + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Memory\r\n" + "used_memory:%zu\r\n" + "used_memory_human:%s\r\n" + "used_memory_rss:%zu\r\n" + "used_memory_peak:%zu\r\n" + "used_memory_peak_human:%s\r\n" + "used_memory_lua:%lld\r\n" + "mem_fragmentation_ratio:%.2f\r\n" + "mem_allocator:%s\r\n", + zmalloc_used_memory(), + hmem, + zmalloc_get_rss(), + server.stat_peak_memory, + peak_hmem, + ((long long)lua_gc(server.lua,LUA_GCCOUNT,0))*1024LL, + zmalloc_get_fragmentation_ratio(), + ZMALLOC_LIB + ); + } + + /* Persistence */ + if (allsections || defsections || !strcasecmp(section,"persistence")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Persistence\r\n" + "loading:%d\r\n" + "rdb_changes_since_last_save:%lld\r\n" + "rdb_bgsave_in_progress:%d\r\n" + "rdb_last_save_time:%ld\r\n" + "rdb_last_bgsave_status:%s\r\n" + "rdb_last_bgsave_time_sec:%ld\r\n" + "rdb_current_bgsave_time_sec:%ld\r\n" + "aof_enabled:%d\r\n" + "aof_rewrite_in_progress:%d\r\n" + "aof_rewrite_scheduled:%d\r\n" + "aof_last_rewrite_time_sec:%ld\r\n" + "aof_current_rewrite_time_sec:%ld\r\n", + server.loading, + server.dirty, + server.rdb_child_pid != -1, + server.lastsave, + server.lastbgsave_status == REDIS_OK ? "ok" : "err", + server.rdb_save_time_last, + (server.rdb_child_pid == -1) ? + -1 : time(NULL)-server.rdb_save_time_start, + server.aof_state != REDIS_AOF_OFF, + server.aof_child_pid != -1, + server.aof_rewrite_scheduled, + server.aof_rewrite_time_last, + (server.aof_child_pid == -1) ? + -1 : time(NULL)-server.aof_rewrite_time_start); + + if (server.aof_state != REDIS_AOF_OFF) { + info = sdscatprintf(info, + "aof_current_size:%lld\r\n" + "aof_base_size:%lld\r\n" + "aof_pending_rewrite:%d\r\n" + "aof_buffer_length:%zu\r\n" + "aof_rewrite_buffer_length:%zu\r\n" + "aof_pending_bio_fsync:%llu\r\n" + "aof_delayed_fsync:%lu\r\n", + (long long) server.aof_current_size, + (long long) server.aof_rewrite_base_size, + server.aof_rewrite_scheduled, + sdslen(server.aof_buf), + aofRewriteBufferSize(), + bioPendingJobsOfType(REDIS_BIO_AOF_FSYNC), + server.aof_delayed_fsync); + } + + if (server.loading) { + double perc; + time_t eta, elapsed; + off_t remaining_bytes = server.loading_total_bytes- + server.loading_loaded_bytes; + + perc = ((double)server.loading_loaded_bytes / + server.loading_total_bytes) * 100; + + elapsed = server.unixtime-server.loading_start_time; + if (elapsed == 0) { + eta = 1; /* A fake 1 second figure if we don't have + enough info */ + } else { + eta = (elapsed*remaining_bytes)/server.loading_loaded_bytes; + } + + info = sdscatprintf(info, + "loading_start_time:%ld\r\n" + "loading_total_bytes:%llu\r\n" + "loading_loaded_bytes:%llu\r\n" + "loading_loaded_perc:%.2f\r\n" + "loading_eta_seconds:%ld\r\n" + ,(unsigned long) server.loading_start_time, + (unsigned long long) server.loading_total_bytes, + (unsigned long long) server.loading_loaded_bytes, + perc, + eta + ); + } + } + + /* Stats */ + if (allsections || defsections || !strcasecmp(section,"stats")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Stats\r\n" + "total_connections_received:%lld\r\n" + "total_commands_processed:%lld\r\n" + "instantaneous_ops_per_sec:%lld\r\n" + "rejected_connections:%lld\r\n" + "expired_keys:%lld\r\n" + "evicted_keys:%lld\r\n" + "keyspace_hits:%lld\r\n" + "keyspace_misses:%lld\r\n" + "pubsub_channels:%ld\r\n" + "pubsub_patterns:%lu\r\n" + "latest_fork_usec:%lld\r\n", + server.stat_numconnections, + server.stat_numcommands, + getOperationsPerSecond(), + server.stat_rejected_conn, + server.stat_expiredkeys, + server.stat_evictedkeys, + server.stat_keyspace_hits, + server.stat_keyspace_misses, + dictSize(server.pubsub_channels), + listLength(server.pubsub_patterns), + server.stat_fork_time); + } + + /* Replication */ + if (allsections || defsections || !strcasecmp(section,"replication")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Replication\r\n" + "role:%s\r\n", + server.masterhost == NULL ? "master" : "slave"); + if (server.masterhost) { + info = sdscatprintf(info, + "master_host:%s\r\n" + "master_port:%d\r\n" + "master_link_status:%s\r\n" + "master_last_io_seconds_ago:%d\r\n" + "master_sync_in_progress:%d\r\n" + ,server.masterhost, + server.masterport, + (server.repl_state == REDIS_REPL_CONNECTED) ? + "up" : "down", + server.master ? + ((int)(server.unixtime-server.master->lastinteraction)) : -1, + server.repl_state == REDIS_REPL_TRANSFER + ); + + if (server.repl_state == REDIS_REPL_TRANSFER) { + info = sdscatprintf(info, + "master_sync_left_bytes:%ld\r\n" + "master_sync_last_io_seconds_ago:%d\r\n" + ,(long)server.repl_transfer_left, + (int)(server.unixtime-server.repl_transfer_lastio) + ); + } + + if (server.repl_state != REDIS_REPL_CONNECTED) { + info = sdscatprintf(info, + "master_link_down_since_seconds:%ld\r\n", + (long)server.unixtime-server.repl_down_since); + } + } + info = sdscatprintf(info, + "connected_slaves:%lu\r\n", + listLength(server.slaves)); + if (listLength(server.slaves)) { + int slaveid = 0; + listNode *ln; + listIter li; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + char *state = NULL; + char ip[32]; + int port; + + if (anetPeerToString(slave->fd,ip,&port) == -1) continue; + switch(slave->replstate) { + case REDIS_REPL_WAIT_BGSAVE_START: + case REDIS_REPL_WAIT_BGSAVE_END: + state = "wait_bgsave"; + break; + case REDIS_REPL_SEND_BULK: + state = "send_bulk"; + break; + case REDIS_REPL_ONLINE: + state = "online"; + break; + } + if (state == NULL) continue; + info = sdscatprintf(info,"slave%d:%s,%d,%s\r\n", + slaveid,ip,port,state); + slaveid++; + } + } + } + + /* CPU */ + if (allsections || defsections || !strcasecmp(section,"cpu")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# CPU\r\n" + "used_cpu_sys:%.2f\r\n" + "used_cpu_user:%.2f\r\n" + "used_cpu_sys_children:%.2f\r\n" + "used_cpu_user_children:%.2f\r\n", + (float)self_ru.ru_stime.tv_sec+(float)self_ru.ru_stime.tv_usec/1000000, + (float)self_ru.ru_utime.tv_sec+(float)self_ru.ru_utime.tv_usec/1000000, + (float)c_ru.ru_stime.tv_sec+(float)c_ru.ru_stime.tv_usec/1000000, + (float)c_ru.ru_utime.tv_sec+(float)c_ru.ru_utime.tv_usec/1000000); + } + + /* cmdtime */ + if (allsections || !strcasecmp(section,"commandstats")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, "# Commandstats\r\n"); + numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); + for (j = 0; j < numcommands; j++) { + struct redisCommand *c = redisCommandTable+j; + + if (!c->calls) continue; + info = sdscatprintf(info, + "cmdstat_%s:calls=%lld,usec=%lld,usec_per_call=%.2f\r\n", + c->name, c->calls, c->microseconds, + (c->calls == 0) ? 0 : ((float)c->microseconds/c->calls)); + } + } + + /* Cluster */ + if (allsections || defsections || !strcasecmp(section,"cluster")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, + "# Cluster\r\n" + "cluster_enabled:%d\r\n", + server.cluster_enabled); + } + + /* Key space */ + if (allsections || defsections || !strcasecmp(section,"keyspace")) { + if (sections++) info = sdscat(info,"\r\n"); + info = sdscatprintf(info, "# Keyspace\r\n"); + for (j = 0; j < server.dbnum; j++) { + long long keys, vkeys; + + keys = dictSize(server.db[j].dict); + vkeys = dictSize(server.db[j].expires); + if (keys || vkeys) { + info = sdscatprintf(info, "db%d:keys=%lld,expires=%lld\r\n", + j, keys, vkeys); + } + } + } + return info; +} + +void infoCommand(redisClient *c) { + char *section = c->argc == 2 ? c->argv[1]->ptr : "default"; + + if (c->argc > 2) { + addReply(c,shared.syntaxerr); + return; + } + sds info = genRedisInfoString(section); + addReplySds(c,sdscatprintf(sdsempty(),"$%lu\r\n", + (unsigned long)sdslen(info))); + addReplySds(c,info); + addReply(c,shared.crlf); +} + +void monitorCommand(redisClient *c) { + /* ignore MONITOR if aleady slave or in monitor mode */ + if (c->flags & REDIS_SLAVE) return; + + c->flags |= (REDIS_SLAVE|REDIS_MONITOR); + c->slaveseldb = 0; + listAddNodeTail(server.monitors,c); + addReply(c,shared.ok); +} + +/* ============================ Maxmemory directive ======================== */ + +/* This function gets called when 'maxmemory' is set on the config file to limit + * the max memory used by the server, before processing a command. + * + * The goal of the function is to free enough memory to keep Redis under the + * configured memory limit. + * + * The function starts calculating how many bytes should be freed to keep + * Redis under the limit, and enters a loop selecting the best keys to + * evict accordingly to the configured policy. + * + * If all the bytes needed to return back under the limit were freed the + * function returns REDIS_OK, otherwise REDIS_ERR is returned, and the caller + * should block the execution of commands that will result in more memory + * used by the server. + */ +int freeMemoryIfNeeded(void) { + size_t mem_used, mem_tofree, mem_freed; + int slaves = listLength(server.slaves); + + /* Remove the size of slaves output buffers and AOF buffer from the + * count of used memory. */ + mem_used = zmalloc_used_memory(); + if (slaves) { + listIter li; + listNode *ln; + + listRewind(server.slaves,&li); + while((ln = listNext(&li))) { + redisClient *slave = listNodeValue(ln); + unsigned long obuf_bytes = getClientOutputBufferMemoryUsage(slave); + if (obuf_bytes > mem_used) + mem_used = 0; + else + mem_used -= obuf_bytes; + } + } + if (server.aof_state != REDIS_AOF_OFF) { + mem_used -= sdslen(server.aof_buf); + mem_used -= aofRewriteBufferSize(); + } + + /* Check if we are over the memory limit. */ + if (mem_used <= server.maxmemory) return REDIS_OK; + + if (server.maxmemory_policy == REDIS_MAXMEMORY_NO_EVICTION) + return REDIS_ERR; /* We need to free memory, but policy forbids. */ + + /* Compute how much memory we need to free. */ + mem_tofree = mem_used - server.maxmemory; + mem_freed = 0; + while (mem_freed < mem_tofree) { + int j, k, keys_freed = 0; + + for (j = 0; j < server.dbnum; j++) { + long bestval = 0; /* just to prevent warning */ + sds bestkey = NULL; + struct dictEntry *de; + redisDb *db = server.db+j; + dict *dict; + + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM) + { + dict = server.db[j].dict; + } else { + dict = server.db[j].expires; + } + if (dictSize(dict) == 0) continue; + + /* volatile-random and allkeys-random policy */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_RANDOM || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_RANDOM) + { + de = dictGetRandomKey(dict); + bestkey = dictGetKey(de); + } + + /* volatile-lru and allkeys-lru policy */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_ALLKEYS_LRU || + server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU) + { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + robj *o; + + de = dictGetRandomKey(dict); + thiskey = dictGetKey(de); + /* When policy is volatile-lru we need an additonal lookup + * to locate the real key, as dict is set to db->expires. */ + if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_LRU) + de = dictFind(db->dict, thiskey); + o = dictGetVal(de); + thisval = estimateObjectIdleTime(o); + + /* Higher idle time is better candidate for deletion */ + if (bestkey == NULL || thisval > bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* volatile-ttl */ + else if (server.maxmemory_policy == REDIS_MAXMEMORY_VOLATILE_TTL) { + for (k = 0; k < server.maxmemory_samples; k++) { + sds thiskey; + long thisval; + + de = dictGetRandomKey(dict); + thiskey = dictGetKey(de); + thisval = (long) dictGetVal(de); + + /* Expire sooner (minor expire unix timestamp) is better + * candidate for deletion */ + if (bestkey == NULL || thisval < bestval) { + bestkey = thiskey; + bestval = thisval; + } + } + } + + /* Finally remove the selected key. */ + if (bestkey) { + long long delta; + + robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); + propagateExpire(db,keyobj); + /* We compute the amount of memory freed by dbDelete() alone. + * It is possible that actually the memory needed to propagate + * the DEL in AOF and replication link is greater than the one + * we are freeing removing the key, but we can't account for + * that otherwise we would never exit the loop. + * + * AOF and Output buffer memory will be freed eventually so + * we only care about memory used by the key space. */ + delta = (long long) zmalloc_used_memory(); + dbDelete(db,keyobj); + delta -= (long long) zmalloc_used_memory(); + mem_freed += delta; + server.stat_evictedkeys++; + decrRefCount(keyobj); + keys_freed++; + + /* When the memory to free starts to be big enough, we may + * start spending so much time here that is impossible to + * deliver data to the slaves fast enough, so we force the + * transmission here inside the loop. */ + if (slaves) flushSlavesOutputBuffers(); + } + } + if (!keys_freed) return REDIS_ERR; /* nothing to free... */ + } + return REDIS_OK; +} + +/* =================================== Main! ================================ */ + +#ifdef __linux__ +int linuxOvercommitMemoryValue(void) { + FILE *fp = fopen("/proc/sys/vm/overcommit_memory","r"); + char buf[64]; + + if (!fp) return -1; + if (fgets(buf,64,fp) == NULL) { + fclose(fp); + return -1; + } + fclose(fp); + + return atoi(buf); +} + +void linuxOvercommitMemoryWarning(void) { + if (linuxOvercommitMemoryValue() == 0) { + redisLog(REDIS_WARNING,"WARNING overcommit_memory is set to 0! Background save may fail under low memory condition. To fix this issue add 'vm.overcommit_memory = 1' to /etc/sysctl.conf and then reboot or run the command 'sysctl vm.overcommit_memory=1' for this to take effect."); + } +} +#endif /* __linux__ */ + +void createPidFile(void) { + /* Try to write the pid file in a best-effort way. */ + FILE *fp = fopen(server.pidfile,"w"); + if (fp) { + fprintf(fp,"%d\n",(int)getpid()); + fclose(fp); + } +} + +void daemonize(void) { + int fd; + + if (fork() != 0) exit(0); /* parent exits */ + setsid(); /* create a new session */ + + /* Every output goes to /dev/null. If Redis is daemonized but + * the 'logfile' is set to 'stdout' in the configuration file + * it will not log at all. */ + if ((fd = open("/dev/null", O_RDWR, 0)) != -1) { + dup2(fd, STDIN_FILENO); + dup2(fd, STDOUT_FILENO); + dup2(fd, STDERR_FILENO); + if (fd > STDERR_FILENO) close(fd); + } +} + +void version() { + printf("Redis server v=%s sha=%s:%d malloc=%s bits=%d\n", + REDIS_VERSION, + redisGitSHA1(), + atoi(redisGitDirty()) > 0, + ZMALLOC_LIB, + sizeof(long) == 4 ? 32 : 64); + exit(0); +} + +void usage() { + fprintf(stderr,"Usage: ./redis-server [/path/to/redis.conf] [options]\n"); + fprintf(stderr," ./redis-server - (read config from stdin)\n"); + fprintf(stderr," ./redis-server -v or --version\n"); + fprintf(stderr," ./redis-server -h or --help\n"); + fprintf(stderr," ./redis-server --test-memory \n\n"); + fprintf(stderr,"Examples:\n"); + fprintf(stderr," ./redis-server (run the server with default conf)\n"); + fprintf(stderr," ./redis-server /etc/redis/6379.conf\n"); + fprintf(stderr," ./redis-server --port 7777\n"); + fprintf(stderr," ./redis-server --port 7777 --slaveof 127.0.0.1 8888\n"); + fprintf(stderr," ./redis-server /etc/myredis.conf --loglevel verbose\n"); + exit(1); +} + +void redisAsciiArt(void) { +#include "asciilogo.h" + char *buf = zmalloc(1024*16); + + snprintf(buf,1024*16,ascii_logo, + REDIS_VERSION, + redisGitSHA1(), + strtol(redisGitDirty(),NULL,10) > 0, + (sizeof(long) == 8) ? "64" : "32", + server.cluster_enabled ? "cluster" : "stand alone", + server.port, + (long) getpid() + ); + redisLogRaw(REDIS_NOTICE|REDIS_LOG_RAW,buf); + zfree(buf); +} + +static void sigtermHandler(int sig) { + REDIS_NOTUSED(sig); + + redisLogFromHandler(REDIS_WARNING,"Received SIGTERM, scheduling shutdown..."); + server.shutdown_asap = 1; +} + +void setupSignalHandlers(void) { + struct sigaction act; + + /* When the SA_SIGINFO flag is set in sa_flags then sa_sigaction is used. + * Otherwise, sa_handler is used. */ + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigtermHandler; + sigaction(SIGTERM, &act, NULL); + +#ifdef HAVE_BACKTRACE + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; + act.sa_sigaction = sigsegvHandler; + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); +#endif + return; +} + +void memtest(size_t megabytes, int passes); + +int main(int argc, char **argv) { + long long start; + struct timeval tv; + + /* We need to initialize our libraries, and the server configuration. */ + zmalloc_enable_thread_safeness(); + srand(time(NULL)^getpid()); + gettimeofday(&tv,NULL); + dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid()); + initServerConfig(); + + if (argc >= 2) { + int j = 1; /* First option to parse in argv[] */ + sds options = sdsempty(); + char *configfile = NULL; + + /* Handle special options --help and --version */ + if (strcmp(argv[1], "-v") == 0 || + strcmp(argv[1], "--version") == 0) version(); + if (strcmp(argv[1], "--help") == 0 || + strcmp(argv[1], "-h") == 0) usage(); + if (strcmp(argv[1], "--test-memory") == 0) { + if (argc == 3) { + memtest(atoi(argv[2]),50); + exit(0); + } else { + fprintf(stderr,"Please specify the amount of memory to test in megabytes.\n"); + fprintf(stderr,"Example: ./redis-server --test-memory 4096\n\n"); + exit(1); + } + } + + /* First argument is the config file name? */ + if (argv[j][0] != '-' || argv[j][1] != '-') + configfile = argv[j++]; + /* All the other options are parsed and conceptually appended to the + * configuration file. For instance --port 6380 will generate the + * string "port 6380\n" to be parsed after the actual file name + * is parsed, if any. */ + while(j != argc) { + if (argv[j][0] == '-' && argv[j][1] == '-') { + /* Option name */ + if (sdslen(options)) options = sdscat(options,"\n"); + options = sdscat(options,argv[j]+2); + options = sdscat(options," "); + } else { + /* Option argument */ + options = sdscatrepr(options,argv[j],strlen(argv[j])); + options = sdscat(options," "); + } + j++; + } + resetServerSaveParams(); + loadServerConfig(configfile,options); + sdsfree(options); + } else { + redisLog(REDIS_WARNING,"Warning: no config file specified, using the default config. In order to specify a config file use 'redis-server /path/to/redis.conf'"); + } + if (server.daemonize) daemonize(); + initServer(); + if (server.daemonize) createPidFile(); + redisAsciiArt(); + redisLog(REDIS_WARNING,"Server started, Redis version " REDIS_VERSION); +#ifdef __linux__ + linuxOvercommitMemoryWarning(); +#endif + start = ustime(); + if (server.aof_state == REDIS_AOF_ON) { + if (loadAppendOnlyFile(server.aof_filename) == REDIS_OK) + redisLog(REDIS_NOTICE,"DB loaded from append only file: %.3f seconds",(float)(ustime()-start)/1000000); + } else { + if (rdbLoad(server.rdb_filename) == REDIS_OK) { + redisLog(REDIS_NOTICE,"DB loaded from disk: %.3f seconds", + (float)(ustime()-start)/1000000); + } else if (errno != ENOENT) { + redisLog(REDIS_WARNING,"Fatal error loading the DB. Exiting."); + exit(1); + } + } + if (server.ipfd > 0) + redisLog(REDIS_NOTICE,"The server is now ready to accept connections on port %d", server.port); + if (server.sofd > 0) + redisLog(REDIS_NOTICE,"The server is now ready to accept connections at %s", server.unixsocket); + aeSetBeforeSleepProc(server.el,beforeSleep); + aeMain(server.el); + aeDeleteEventLoop(server.el); + return 0; +} + +/* The End */ diff --git a/test/fixtures/c/yajl.c b/test/fixtures/c/yajl.c new file mode 100644 index 00000000..50bca443 --- /dev/null +++ b/test/fixtures/c/yajl.c @@ -0,0 +1,164 @@ +/* + * Copyright 2010, Lloyd Hilaiel. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. Neither the name of Lloyd Hilaiel nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "api/yajl_parse.h" +#include "yajl_lex.h" +#include "yajl_parser.h" +#include "yajl_alloc.h" + +#include +#include +#include + +const char * +yajl_status_to_string(yajl_status stat) +{ + const char * statStr = "unknown"; + switch (stat) { + case yajl_status_ok: + statStr = "ok, no error"; + break; + case yajl_status_client_canceled: + statStr = "client canceled parse"; + break; + case yajl_status_insufficient_data: + statStr = "eof was met before the parse could complete"; + break; + case yajl_status_error: + statStr = "parse error"; + break; + } + return statStr; +} + +yajl_handle +yajl_alloc(const yajl_callbacks * callbacks, + const yajl_parser_config * config, + const yajl_alloc_funcs * afs, + void * ctx) +{ + unsigned int allowComments = 0; + unsigned int validateUTF8 = 0; + yajl_handle hand = NULL; + yajl_alloc_funcs afsBuffer; + + /* first order of business is to set up memory allocation routines */ + if (afs != NULL) { + if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL) + { + return NULL; + } + } else { + yajl_set_default_alloc_funcs(&afsBuffer); + afs = &afsBuffer; + } + + hand = (yajl_handle) YA_MALLOC(afs, sizeof(struct yajl_handle_t)); + + /* copy in pointers to allocation routines */ + memcpy((void *) &(hand->alloc), (void *) afs, sizeof(yajl_alloc_funcs)); + + if (config != NULL) { + allowComments = config->allowComments; + validateUTF8 = config->checkUTF8; + } + + hand->callbacks = callbacks; + hand->ctx = ctx; + hand->lexer = yajl_lex_alloc(&(hand->alloc), allowComments, validateUTF8); + hand->bytesConsumed = 0; + hand->decodeBuf = yajl_buf_alloc(&(hand->alloc)); + yajl_bs_init(hand->stateStack, &(hand->alloc)); + + yajl_bs_push(hand->stateStack, yajl_state_start); + + return hand; +} + +void +yajl_reset_parser(yajl_handle hand) { + hand->lexer = yajl_lex_realloc(hand->lexer); +} + +void +yajl_free(yajl_handle handle) +{ + yajl_bs_free(handle->stateStack); + yajl_buf_free(handle->decodeBuf); + yajl_lex_free(handle->lexer); + YA_FREE(&(handle->alloc), handle); +} + +yajl_status +yajl_parse(yajl_handle hand, const unsigned char * jsonText, + unsigned int jsonTextLen) +{ + yajl_status status; + status = yajl_do_parse(hand, jsonText, jsonTextLen); + return status; +} + +yajl_status +yajl_parse_complete(yajl_handle hand) +{ + /* The particular case we want to handle is a trailing number. + * Further input consisting of digits could cause our interpretation + * of the number to change (buffered "1" but "2" comes in). + * A very simple approach to this is to inject whitespace to terminate + * any number in the lex buffer. + */ + return yajl_parse(hand, (const unsigned char *)" ", 1); +} + +unsigned char * +yajl_get_error(yajl_handle hand, int verbose, + const unsigned char * jsonText, unsigned int jsonTextLen) +{ + return yajl_render_error_string(hand, jsonText, jsonTextLen, verbose); +} + +unsigned int +yajl_get_bytes_consumed(yajl_handle hand) +{ + if (!hand) return 0; + else return hand->bytesConsumed; +} + + +void +yajl_free_error(yajl_handle hand, unsigned char * str) +{ + /* use memory allocation functions if set */ + YA_FREE(&(hand->alloc), str); +} + +/* XXX: add utility routines to parse from file */ diff --git a/test/fixtures/coffee/browser.coffee b/test/fixtures/coffee/browser.coffee new file mode 100644 index 00000000..5ed0bea8 --- /dev/null +++ b/test/fixtures/coffee/browser.coffee @@ -0,0 +1,55 @@ +# Override exported methods for non-Node.js engines. + +CoffeeScript = require './coffee-script' +CoffeeScript.require = require + +# Use standard JavaScript `eval` to eval code. +CoffeeScript.eval = (code, options = {}) -> + options.bare ?= on + eval CoffeeScript.compile code, options + +# Running code does not provide access to this scope. +CoffeeScript.run = (code, options = {}) -> + options.bare = on + Function(CoffeeScript.compile code, options)() + +# If we're not in a browser environment, we're finished with the public API. +return unless window? + +# Load a remote script from the current domain via XHR. +CoffeeScript.load = (url, callback) -> + xhr = new (window.ActiveXObject or XMLHttpRequest)('Microsoft.XMLHTTP') + xhr.open 'GET', url, true + xhr.overrideMimeType 'text/plain' if 'overrideMimeType' of xhr + xhr.onreadystatechange = -> + if xhr.readyState is 4 + if xhr.status in [0, 200] + CoffeeScript.run xhr.responseText + else + throw new Error "Could not load #{url}" + callback() if callback + xhr.send null + +# Activate CoffeeScript in the browser by having it compile and evaluate +# all script tags with a content-type of `text/coffeescript`. +# This happens on page load. +runScripts = -> + scripts = document.getElementsByTagName 'script' + coffees = (s for s in scripts when s.type is 'text/coffeescript') + index = 0 + length = coffees.length + do execute = -> + script = coffees[index++] + if script?.type is 'text/coffeescript' + if script.src + CoffeeScript.load script.src, execute + else + CoffeeScript.run script.innerHTML + execute() + null + +# Listen for window load, both in browsers and in IE. +if window.addEventListener + addEventListener 'DOMContentLoaded', runScripts, no +else + attachEvent 'onload', runScripts diff --git a/test/fixtures/coffee/coffee-script.coffee b/test/fixtures/coffee/coffee-script.coffee new file mode 100644 index 00000000..2933aebf --- /dev/null +++ b/test/fixtures/coffee/coffee-script.coffee @@ -0,0 +1,130 @@ +# CoffeeScript can be used both on the server, as a command-line compiler based +# on Node.js/V8, or to run CoffeeScripts directly in the browser. This module +# contains the main entry functions for tokenizing, parsing, and compiling +# source CoffeeScript into JavaScript. +# +# If included on a webpage, it will automatically sniff out, compile, and +# execute all scripts present in `text/coffeescript` tags. + +fs = require 'fs' +path = require 'path' +{Lexer,RESERVED} = require './lexer' +{parser} = require './parser' +vm = require 'vm' + +# TODO: Remove registerExtension when fully deprecated. +if require.extensions + require.extensions['.coffee'] = (module, filename) -> + content = compile fs.readFileSync(filename, 'utf8'), {filename} + module._compile content, filename +else if require.registerExtension + require.registerExtension '.coffee', (content) -> compile content + +# The current CoffeeScript version number. +exports.VERSION = '1.3.3' + +# Words that cannot be used as identifiers in CoffeeScript code +exports.RESERVED = RESERVED + +# Expose helpers for testing. +exports.helpers = require './helpers' + +# Compile a string of CoffeeScript code to JavaScript, using the Coffee/Jison +# compiler. +exports.compile = compile = (code, options = {}) -> + {merge} = exports.helpers + try + js = (parser.parse lexer.tokenize code).compile options + return js unless options.header + catch err + err.message = "In #{options.filename}, #{err.message}" if options.filename + throw err + header = "Generated by CoffeeScript #{@VERSION}" + "// #{header}\n#{js}" + +# Tokenize a string of CoffeeScript code, and return the array of tokens. +exports.tokens = (code, options) -> + lexer.tokenize code, options + +# Parse a string of CoffeeScript code or an array of lexed tokens, and +# return the AST. You can then compile it by calling `.compile()` on the root, +# or traverse it by using `.traverseChildren()` with a callback. +exports.nodes = (source, options) -> + if typeof source is 'string' + parser.parse lexer.tokenize source, options + else + parser.parse source + +# Compile and execute a string of CoffeeScript (on the server), correctly +# setting `__filename`, `__dirname`, and relative `require()`. +exports.run = (code, options = {}) -> + mainModule = require.main + + # Set the filename. + mainModule.filename = process.argv[1] = + if options.filename then fs.realpathSync(options.filename) else '.' + + # Clear the module cache. + mainModule.moduleCache and= {} + + # Assign paths for node_modules loading + mainModule.paths = require('module')._nodeModulePaths path.dirname fs.realpathSync options.filename + + # Compile. + if path.extname(mainModule.filename) isnt '.coffee' or require.extensions + mainModule._compile compile(code, options), mainModule.filename + else + mainModule._compile code, mainModule.filename + +# Compile and evaluate a string of CoffeeScript (in a Node.js-like environment). +# The CoffeeScript REPL uses this to run the input. +exports.eval = (code, options = {}) -> + return unless code = code.trim() + Script = vm.Script + if Script + if options.sandbox? + if options.sandbox instanceof Script.createContext().constructor + sandbox = options.sandbox + else + sandbox = Script.createContext() + sandbox[k] = v for own k, v of options.sandbox + sandbox.global = sandbox.root = sandbox.GLOBAL = sandbox + else + sandbox = global + sandbox.__filename = options.filename || 'eval' + sandbox.__dirname = path.dirname sandbox.__filename + # define module/require only if they chose not to specify their own + unless sandbox isnt global or sandbox.module or sandbox.require + Module = require 'module' + sandbox.module = _module = new Module(options.modulename || 'eval') + sandbox.require = _require = (path) -> Module._load path, _module, true + _module.filename = sandbox.__filename + _require[r] = require[r] for r in Object.getOwnPropertyNames require when r isnt 'paths' + # use the same hack node currently uses for their own REPL + _require.paths = _module.paths = Module._nodeModulePaths process.cwd() + _require.resolve = (request) -> Module._resolveFilename request, _module + o = {} + o[k] = v for own k, v of options + o.bare = on # ensure return value + js = compile code, o + if sandbox is global + vm.runInThisContext js + else + vm.runInContext js, sandbox + +# Instantiate a Lexer for our use here. +lexer = new Lexer + +# The real Lexer produces a generic stream of tokens. This object provides a +# thin wrapper around it, compatible with the Jison API. We can then pass it +# directly as a "Jison lexer". +parser.lexer = + lex: -> + [tag, @yytext, @yylineno] = @tokens[@pos++] or [''] + tag + setInput: (@tokens) -> + @pos = 0 + upcomingInput: -> + "" + +parser.yy = require './nodes' diff --git a/test/fixtures/coffee/lexer.coffee b/test/fixtures/coffee/lexer.coffee new file mode 100644 index 00000000..56e8e324 --- /dev/null +++ b/test/fixtures/coffee/lexer.coffee @@ -0,0 +1,709 @@ +# The CoffeeScript Lexer. Uses a series of token-matching regexes to attempt +# matches against the beginning of the source code. When a match is found, +# a token is produced, we consume the match, and start again. Tokens are in the +# form: +# +# [tag, value, lineNumber] +# +# Which is a format that can be fed directly into [Jison](http://github.com/zaach/jison). + +{Rewriter, INVERSES} = require './rewriter' + +# Import the helpers we need. +{count, starts, compact, last} = require './helpers' + +# The Lexer Class +# --------------- + +# The Lexer class reads a stream of CoffeeScript and divvies it up into tagged +# tokens. Some potential ambiguity in the grammar has been avoided by +# pushing some extra smarts into the Lexer. +exports.Lexer = class Lexer + + # **tokenize** is the Lexer's main method. Scan by attempting to match tokens + # one at a time, using a regular expression anchored at the start of the + # remaining code, or a custom recursive token-matching method + # (for interpolations). When the next token has been recorded, we move forward + # within the code past the token, and begin again. + # + # Each tokenizing method is responsible for returning the number of characters + # it has consumed. + # + # Before returning the token stream, run it through the [Rewriter](rewriter.html) + # unless explicitly asked not to. + tokenize: (code, opts = {}) -> + code = "\n#{code}" if WHITESPACE.test code + code = code.replace(/\r/g, '').replace TRAILING_SPACES, '' + + @code = code # The remainder of the source code. + @line = opts.line or 0 # The current line. + @indent = 0 # The current indentation level. + @indebt = 0 # The over-indentation at the current level. + @outdebt = 0 # The under-outdentation at the current level. + @indents = [] # The stack of all current indentation levels. + @ends = [] # The stack for pairing up tokens. + @tokens = [] # Stream of parsed tokens in the form `['TYPE', value, line]`. + + # At every position, run through this list of attempted matches, + # short-circuiting if any of them succeed. Their order determines precedence: + # `@literalToken` is the fallback catch-all. + i = 0 + while @chunk = code[i..] + i += @identifierToken() or + @commentToken() or + @whitespaceToken() or + @lineToken() or + @heredocToken() or + @stringToken() or + @numberToken() or + @regexToken() or + @jsToken() or + @literalToken() + + @closeIndentation() + @error "missing #{tag}" if tag = @ends.pop() + return @tokens if opts.rewrite is off + (new Rewriter).rewrite @tokens + + # Tokenizers + # ---------- + + # Matches identifying literals: variables, keywords, method names, etc. + # Check to ensure that JavaScript reserved words aren't being used as + # identifiers. Because CoffeeScript reserves a handful of keywords that are + # allowed in JavaScript, we're careful not to tag them as keywords when + # referenced as property names here, so you can still do `jQuery.is()` even + # though `is` means `===` otherwise. + identifierToken: -> + return 0 unless match = IDENTIFIER.exec @chunk + [input, id, colon] = match + + if id is 'own' and @tag() is 'FOR' + @token 'OWN', id + return id.length + forcedIdentifier = colon or + (prev = last @tokens) and (prev[0] in ['.', '?.', '::'] or + not prev.spaced and prev[0] is '@') + tag = 'IDENTIFIER' + + if not forcedIdentifier and (id in JS_KEYWORDS or id in COFFEE_KEYWORDS) + tag = id.toUpperCase() + if tag is 'WHEN' and @tag() in LINE_BREAK + tag = 'LEADING_WHEN' + else if tag is 'FOR' + @seenFor = yes + else if tag is 'UNLESS' + tag = 'IF' + else if tag in UNARY + tag = 'UNARY' + else if tag in RELATION + if tag isnt 'INSTANCEOF' and @seenFor + tag = 'FOR' + tag + @seenFor = no + else + tag = 'RELATION' + if @value() is '!' + @tokens.pop() + id = '!' + id + + if id in JS_FORBIDDEN + if forcedIdentifier + tag = 'IDENTIFIER' + id = new String id + id.reserved = yes + else if id in RESERVED + @error "reserved word \"#{id}\"" + + unless forcedIdentifier + id = COFFEE_ALIAS_MAP[id] if id in COFFEE_ALIASES + tag = switch id + when '!' then 'UNARY' + when '==', '!=' then 'COMPARE' + when '&&', '||' then 'LOGIC' + when 'true', 'false' then 'BOOL' + when 'break', 'continue' then 'STATEMENT' + else tag + + @token tag, id + @token ':', ':' if colon + input.length + + # Matches numbers, including decimals, hex, and exponential notation. + # Be careful not to interfere with ranges-in-progress. + numberToken: -> + return 0 unless match = NUMBER.exec @chunk + number = match[0] + if /^0[BOX]/.test number + @error "radix prefix '#{number}' must be lowercase" + else if /E/.test(number) and not /^0x/.test number + @error "exponential notation '#{number}' must be indicated with a lowercase 'e'" + else if /^0\d*[89]/.test number + @error "decimal literal '#{number}' must not be prefixed with '0'" + else if /^0\d+/.test number + @error "octal literal '#{number}' must be prefixed with '0o'" + lexedLength = number.length + if octalLiteral = /^0o([0-7]+)/.exec number + number = '0x' + (parseInt octalLiteral[1], 8).toString 16 + if binaryLiteral = /^0b([01]+)/.exec number + number = '0x' + (parseInt binaryLiteral[1], 2).toString 16 + @token 'NUMBER', number + lexedLength + + # Matches strings, including multi-line strings. Ensures that quotation marks + # are balanced within the string's contents, and within nested interpolations. + stringToken: -> + switch @chunk.charAt 0 + when "'" + return 0 unless match = SIMPLESTR.exec @chunk + @token 'STRING', (string = match[0]).replace MULTILINER, '\\\n' + when '"' + return 0 unless string = @balancedString @chunk, '"' + if 0 < string.indexOf '#{', 1 + @interpolateString string[1...-1] + else + @token 'STRING', @escapeLines string + else + return 0 + if octalEsc = /^(?:\\.|[^\\])*\\(?:0[0-7]|[1-7])/.test string + @error "octal escape sequences #{string} are not allowed" + @line += count string, '\n' + string.length + + # Matches heredocs, adjusting indentation to the correct level, as heredocs + # preserve whitespace, but ignore indentation to the left. + heredocToken: -> + return 0 unless match = HEREDOC.exec @chunk + heredoc = match[0] + quote = heredoc.charAt 0 + doc = @sanitizeHeredoc match[2], quote: quote, indent: null + if quote is '"' and 0 <= doc.indexOf '#{' + @interpolateString doc, heredoc: yes + else + @token 'STRING', @makeString doc, quote, yes + @line += count heredoc, '\n' + heredoc.length + + # Matches and consumes comments. + commentToken: -> + return 0 unless match = @chunk.match COMMENT + [comment, here] = match + if here + @token 'HERECOMMENT', @sanitizeHeredoc here, + herecomment: true, indent: Array(@indent + 1).join(' ') + @line += count comment, '\n' + comment.length + + # Matches JavaScript interpolated directly into the source via backticks. + jsToken: -> + return 0 unless @chunk.charAt(0) is '`' and match = JSTOKEN.exec @chunk + @token 'JS', (script = match[0])[1...-1] + script.length + + # Matches regular expression literals. Lexing regular expressions is difficult + # to distinguish from division, so we borrow some basic heuristics from + # JavaScript and Ruby. + regexToken: -> + return 0 if @chunk.charAt(0) isnt '/' + if match = HEREGEX.exec @chunk + length = @heregexToken match + @line += count match[0], '\n' + return length + + prev = last @tokens + return 0 if prev and (prev[0] in (if prev.spaced then NOT_REGEX else NOT_SPACED_REGEX)) + return 0 unless match = REGEX.exec @chunk + [match, regex, flags] = match + if regex[..1] is '/*' then @error 'regular expressions cannot begin with `*`' + if regex is '//' then regex = '/(?:)/' + @token 'REGEX', "#{regex}#{flags}" + match.length + + # Matches multiline extended regular expressions. + heregexToken: (match) -> + [heregex, body, flags] = match + if 0 > body.indexOf '#{' + re = body.replace(HEREGEX_OMIT, '').replace(/\//g, '\\/') + if re.match /^\*/ then @error 'regular expressions cannot begin with `*`' + @token 'REGEX', "/#{ re or '(?:)' }/#{flags}" + return heregex.length + @token 'IDENTIFIER', 'RegExp' + @tokens.push ['CALL_START', '('] + tokens = [] + for [tag, value] in @interpolateString(body, regex: yes) + if tag is 'TOKENS' + tokens.push value... + else + continue unless value = value.replace HEREGEX_OMIT, '' + value = value.replace /\\/g, '\\\\' + tokens.push ['STRING', @makeString(value, '"', yes)] + tokens.push ['+', '+'] + tokens.pop() + @tokens.push ['STRING', '""'], ['+', '+'] unless tokens[0]?[0] is 'STRING' + @tokens.push tokens... + @tokens.push [',', ','], ['STRING', '"' + flags + '"'] if flags + @token ')', ')' + heregex.length + + # Matches newlines, indents, and outdents, and determines which is which. + # If we can detect that the current line is continued onto the the next line, + # then the newline is suppressed: + # + # elements + # .each( ... ) + # .map( ... ) + # + # Keeps track of the level of indentation, because a single outdent token + # can close multiple indents, so we need to know how far in we happen to be. + lineToken: -> + return 0 unless match = MULTI_DENT.exec @chunk + indent = match[0] + @line += count indent, '\n' + @seenFor = no + size = indent.length - 1 - indent.lastIndexOf '\n' + noNewlines = @unfinished() + if size - @indebt is @indent + if noNewlines then @suppressNewlines() else @newlineToken() + return indent.length + if size > @indent + if noNewlines + @indebt = size - @indent + @suppressNewlines() + return indent.length + diff = size - @indent + @outdebt + @token 'INDENT', diff + @indents.push diff + @ends.push 'OUTDENT' + @outdebt = @indebt = 0 + else + @indebt = 0 + @outdentToken @indent - size, noNewlines + @indent = size + indent.length + + # Record an outdent token or multiple tokens, if we happen to be moving back + # inwards past several recorded indents. + outdentToken: (moveOut, noNewlines) -> + while moveOut > 0 + len = @indents.length - 1 + if @indents[len] is undefined + moveOut = 0 + else if @indents[len] is @outdebt + moveOut -= @outdebt + @outdebt = 0 + else if @indents[len] < @outdebt + @outdebt -= @indents[len] + moveOut -= @indents[len] + else + dent = @indents.pop() - @outdebt + moveOut -= dent + @outdebt = 0 + @pair 'OUTDENT' + @token 'OUTDENT', dent + @outdebt -= moveOut if dent + @tokens.pop() while @value() is ';' + @token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR' or noNewlines + this + + # Matches and consumes non-meaningful whitespace. Tag the previous token + # as being "spaced", because there are some cases where it makes a difference. + whitespaceToken: -> + return 0 unless (match = WHITESPACE.exec @chunk) or + (nline = @chunk.charAt(0) is '\n') + prev = last @tokens + prev[if match then 'spaced' else 'newLine'] = true if prev + if match then match[0].length else 0 + + # Generate a newline token. Consecutive newlines get merged together. + newlineToken: -> + @tokens.pop() while @value() is ';' + @token 'TERMINATOR', '\n' unless @tag() is 'TERMINATOR' + this + + # Use a `\` at a line-ending to suppress the newline. + # The slash is removed here once its job is done. + suppressNewlines: -> + @tokens.pop() if @value() is '\\' + this + + # We treat all other single characters as a token. E.g.: `( ) , . !` + # Multi-character operators are also literal tokens, so that Jison can assign + # the proper order of operations. There are some symbols that we tag specially + # here. `;` and newlines are both treated as a `TERMINATOR`, we distinguish + # parentheses that indicate a method call from regular parentheses, and so on. + literalToken: -> + if match = OPERATOR.exec @chunk + [value] = match + @tagParameters() if CODE.test value + else + value = @chunk.charAt 0 + tag = value + prev = last @tokens + if value is '=' and prev + if not prev[1].reserved and prev[1] in JS_FORBIDDEN + @error "reserved word \"#{@value()}\" can't be assigned" + if prev[1] in ['||', '&&'] + prev[0] = 'COMPOUND_ASSIGN' + prev[1] += '=' + return value.length + if value is ';' + @seenFor = no + tag = 'TERMINATOR' + else if value in MATH then tag = 'MATH' + else if value in COMPARE then tag = 'COMPARE' + else if value in COMPOUND_ASSIGN then tag = 'COMPOUND_ASSIGN' + else if value in UNARY then tag = 'UNARY' + else if value in SHIFT then tag = 'SHIFT' + else if value in LOGIC or value is '?' and prev?.spaced then tag = 'LOGIC' + else if prev and not prev.spaced + if value is '(' and prev[0] in CALLABLE + prev[0] = 'FUNC_EXIST' if prev[0] is '?' + tag = 'CALL_START' + else if value is '[' and prev[0] in INDEXABLE + tag = 'INDEX_START' + switch prev[0] + when '?' then prev[0] = 'INDEX_SOAK' + switch value + when '(', '{', '[' then @ends.push INVERSES[value] + when ')', '}', ']' then @pair value + @token tag, value + value.length + + # Token Manipulators + # ------------------ + + # Sanitize a heredoc or herecomment by + # erasing all external indentation on the left-hand side. + sanitizeHeredoc: (doc, options) -> + {indent, herecomment} = options + if herecomment + if HEREDOC_ILLEGAL.test doc + @error "block comment cannot contain \"*/\", starting" + return doc if doc.indexOf('\n') <= 0 + else + while match = HEREDOC_INDENT.exec doc + attempt = match[1] + indent = attempt if indent is null or 0 < attempt.length < indent.length + doc = doc.replace /// \n #{indent} ///g, '\n' if indent + doc = doc.replace /^\n/, '' unless herecomment + doc + + # A source of ambiguity in our grammar used to be parameter lists in function + # definitions versus argument lists in function calls. Walk backwards, tagging + # parameters specially in order to make things easier for the parser. + tagParameters: -> + return this if @tag() isnt ')' + stack = [] + {tokens} = this + i = tokens.length + tokens[--i][0] = 'PARAM_END' + while tok = tokens[--i] + switch tok[0] + when ')' + stack.push tok + when '(', 'CALL_START' + if stack.length then stack.pop() + else if tok[0] is '(' + tok[0] = 'PARAM_START' + return this + else return this + this + + # Close up all remaining open blocks at the end of the file. + closeIndentation: -> + @outdentToken @indent + + # Matches a balanced group such as a single or double-quoted string. Pass in + # a series of delimiters, all of which must be nested correctly within the + # contents of the string. This method allows us to have strings within + # interpolations within strings, ad infinitum. + balancedString: (str, end) -> + continueCount = 0 + stack = [end] + for i in [1...str.length] + if continueCount + --continueCount + continue + switch letter = str.charAt i + when '\\' + ++continueCount + continue + when end + stack.pop() + unless stack.length + return str[0..i] + end = stack[stack.length - 1] + continue + if end is '}' and letter in ['"', "'"] + stack.push end = letter + else if end is '}' and letter is '/' and match = (HEREGEX.exec(str[i..]) or REGEX.exec(str[i..])) + continueCount += match[0].length - 1 + else if end is '}' and letter is '{' + stack.push end = '}' + else if end is '"' and prev is '#' and letter is '{' + stack.push end = '}' + prev = letter + @error "missing #{ stack.pop() }, starting" + + # Expand variables and expressions inside double-quoted strings using + # Ruby-like notation for substitution of arbitrary expressions. + # + # "Hello #{name.capitalize()}." + # + # If it encounters an interpolation, this method will recursively create a + # new Lexer, tokenize the interpolated contents, and merge them into the + # token stream. + interpolateString: (str, options = {}) -> + {heredoc, regex} = options + tokens = [] + pi = 0 + i = -1 + while letter = str.charAt i += 1 + if letter is '\\' + i += 1 + continue + unless letter is '#' and str.charAt(i+1) is '{' and + (expr = @balancedString str[i + 1..], '}') + continue + tokens.push ['NEOSTRING', str[pi...i]] if pi < i + inner = expr[1...-1] + if inner.length + nested = new Lexer().tokenize inner, line: @line, rewrite: off + nested.pop() + nested.shift() if nested[0]?[0] is 'TERMINATOR' + if len = nested.length + if len > 1 + nested.unshift ['(', '(', @line] + nested.push [')', ')', @line] + tokens.push ['TOKENS', nested] + i += expr.length + pi = i + 1 + tokens.push ['NEOSTRING', str[pi..]] if i > pi < str.length + return tokens if regex + return @token 'STRING', '""' unless tokens.length + tokens.unshift ['', ''] unless tokens[0][0] is 'NEOSTRING' + @token '(', '(' if interpolated = tokens.length > 1 + for [tag, value], i in tokens + @token '+', '+' if i + if tag is 'TOKENS' + @tokens.push value... + else + @token 'STRING', @makeString value, '"', heredoc + @token ')', ')' if interpolated + tokens + + # Pairs up a closing token, ensuring that all listed pairs of tokens are + # correctly balanced throughout the course of the token stream. + pair: (tag) -> + unless tag is wanted = last @ends + @error "unmatched #{tag}" unless 'OUTDENT' is wanted + # Auto-close INDENT to support syntax like this: + # + # el.click((event) -> + # el.hide()) + # + @indent -= size = last @indents + @outdentToken size, true + return @pair tag + @ends.pop() + + # Helpers + # ------- + + # Add a token to the results, taking note of the line number. + token: (tag, value) -> + @tokens.push [tag, value, @line] + + # Peek at a tag in the current token stream. + tag: (index, tag) -> + (tok = last @tokens, index) and if tag then tok[0] = tag else tok[0] + + # Peek at a value in the current token stream. + value: (index, val) -> + (tok = last @tokens, index) and if val then tok[1] = val else tok[1] + + # Are we in the midst of an unfinished expression? + unfinished: -> + LINE_CONTINUER.test(@chunk) or + @tag() in ['\\', '.', '?.', 'UNARY', 'MATH', '+', '-', 'SHIFT', 'RELATION' + 'COMPARE', 'LOGIC', 'THROW', 'EXTENDS'] + + # Converts newlines for string literals. + escapeLines: (str, heredoc) -> + str.replace MULTILINER, if heredoc then '\\n' else '' + + # Constructs a string token by escaping quotes and newlines. + makeString: (body, quote, heredoc) -> + return quote + quote unless body + body = body.replace /\\([\s\S])/g, (match, contents) -> + if contents in ['\n', quote] then contents else match + body = body.replace /// #{quote} ///g, '\\$&' + quote + @escapeLines(body, heredoc) + quote + + # Throws a syntax error on the current `@line`. + error: (message) -> + throw SyntaxError "#{message} on line #{ @line + 1}" + +# Constants +# --------- + +# Keywords that CoffeeScript shares in common with JavaScript. +JS_KEYWORDS = [ + 'true', 'false', 'null', 'this' + 'new', 'delete', 'typeof', 'in', 'instanceof' + 'return', 'throw', 'break', 'continue', 'debugger' + 'if', 'else', 'switch', 'for', 'while', 'do', 'try', 'catch', 'finally' + 'class', 'extends', 'super' +] + +# CoffeeScript-only keywords. +COFFEE_KEYWORDS = ['undefined', 'then', 'unless', 'until', 'loop', 'of', 'by', 'when'] + +COFFEE_ALIAS_MAP = + and : '&&' + or : '||' + is : '==' + isnt : '!=' + not : '!' + yes : 'true' + no : 'false' + on : 'true' + off : 'false' + +COFFEE_ALIASES = (key for key of COFFEE_ALIAS_MAP) +COFFEE_KEYWORDS = COFFEE_KEYWORDS.concat COFFEE_ALIASES + +# The list of keywords that are reserved by JavaScript, but not used, or are +# used by CoffeeScript internally. We throw an error when these are encountered, +# to avoid having a JavaScript error at runtime. +RESERVED = [ + 'case', 'default', 'function', 'var', 'void', 'with' + 'const', 'let', 'enum', 'export', 'import', 'native' + '__hasProp', '__extends', '__slice', '__bind', '__indexOf' + 'implements', 'interface', 'let', 'package', + 'private', 'protected', 'public', 'static', 'yield' +] + +STRICT_PROSCRIBED = ['arguments', 'eval'] + +# The superset of both JavaScript keywords and reserved words, none of which may +# be used as identifiers or properties. +JS_FORBIDDEN = JS_KEYWORDS.concat(RESERVED).concat(STRICT_PROSCRIBED) + +exports.RESERVED = RESERVED.concat(JS_KEYWORDS).concat(COFFEE_KEYWORDS).concat(STRICT_PROSCRIBED) +exports.STRICT_PROSCRIBED = STRICT_PROSCRIBED + +# Token matching regexes. +IDENTIFIER = /// ^ + ( [$A-Za-z_\x7f-\uffff][$\w\x7f-\uffff]* ) + ( [^\n\S]* : (?!:) )? # Is this a property name? +/// + +NUMBER = /// + ^ 0b[01]+ | # binary + ^ 0o[0-7]+ | # octal + ^ 0x[\da-f]+ | # hex + ^ \d*\.?\d+ (?:e[+-]?\d+)? # decimal +///i + +HEREDOC = /// ^ ("""|''') ([\s\S]*?) (?:\n[^\n\S]*)? \1 /// + +OPERATOR = /// ^ ( + ?: [-=]> # function + | [-+*/%<>&|^!?=]= # compound assign / compare + | >>>=? # zero-fill right shift + | ([-+:])\1 # doubles + | ([&|<>])\2=? # logic / shift + | \?\. # soak access + | \.{2,3} # range or splat +) /// + +WHITESPACE = /^[^\n\S]+/ + +COMMENT = /^###([^#][\s\S]*?)(?:###[^\n\S]*|(?:###)?$)|^(?:\s*#(?!##[^#]).*)+/ + +CODE = /^[-=]>/ + +MULTI_DENT = /^(?:\n[^\n\S]*)+/ + +SIMPLESTR = /^'[^\\']*(?:\\.[^\\']*)*'/ + +JSTOKEN = /^`[^\\`]*(?:\\.[^\\`]*)*`/ + +# Regex-matching-regexes. +REGEX = /// ^ + (/ (?! [\s=] ) # disallow leading whitespace or equals signs + [^ [ / \n \\ ]* # every other thing + (?: + (?: \\[\s\S] # anything escaped + | \[ # character class + [^ \] \n \\ ]* + (?: \\[\s\S] [^ \] \n \\ ]* )* + ] + ) [^ [ / \n \\ ]* + )* + /) ([imgy]{0,4}) (?!\w) +/// + +HEREGEX = /// ^ /{3} ([\s\S]+?) /{3} ([imgy]{0,4}) (?!\w) /// + +HEREGEX_OMIT = /\s+(?:#.*)?/g + +# Token cleaning regexes. +MULTILINER = /\n/g + +HEREDOC_INDENT = /\n+([^\n\S]*)/g + +HEREDOC_ILLEGAL = /\*\// + +LINE_CONTINUER = /// ^ \s* (?: , | \??\.(?![.\d]) | :: ) /// + +TRAILING_SPACES = /\s+$/ + +# Compound assignment tokens. +COMPOUND_ASSIGN = [ + '-=', '+=', '/=', '*=', '%=', '||=', '&&=', '?=', '<<=', '>>=', '>>>=', '&=', '^=', '|=' +] + +# Unary tokens. +UNARY = ['!', '~', 'NEW', 'TYPEOF', 'DELETE', 'DO'] + +# Logical tokens. +LOGIC = ['&&', '||', '&', '|', '^'] + +# Bit-shifting tokens. +SHIFT = ['<<', '>>', '>>>'] + +# Comparison tokens. +COMPARE = ['==', '!=', '<', '>', '<=', '>='] + +# Mathematical tokens. +MATH = ['*', '/', '%'] + +# Relational tokens that are negatable with `not` prefix. +RELATION = ['IN', 'OF', 'INSTANCEOF'] + +# Boolean tokens. +BOOL = ['TRUE', 'FALSE'] + +# Tokens which a regular expression will never immediately follow, but which +# a division operator might. +# +# See: http://www.mozilla.org/js/language/js20-2002-04/rationale/syntax.html#regular-expressions +# +# Our list is shorter, due to sans-parentheses method calls. +NOT_REGEX = ['NUMBER', 'REGEX', 'BOOL', 'NULL', 'UNDEFINED', '++', '--', ']'] + +# If the previous token is not spaced, there are more preceding tokens that +# force a division parse: +NOT_SPACED_REGEX = NOT_REGEX.concat ')', '}', 'THIS', 'IDENTIFIER', 'STRING' + +# Tokens which could legitimately be invoked or indexed. An opening +# parentheses or bracket following these tokens will be recorded as the start +# of a function invocation or indexing operation. +CALLABLE = ['IDENTIFIER', 'STRING', 'REGEX', ')', ']', '}', '?', '::', '@', 'THIS', 'SUPER'] +INDEXABLE = CALLABLE.concat 'NUMBER', 'BOOL', 'NULL', 'UNDEFINED' + +# Tokens that, when immediately preceding a `WHEN`, indicate that the `WHEN` +# occurs at the start of a line. We disambiguate these from trailing whens to +# avoid an ambiguity in the grammar. +LINE_BREAK = ['INDENT', 'OUTDENT', 'TERMINATOR'] diff --git a/test/fixtures/coffee/rack_application.coffee b/test/fixtures/coffee/rack_application.coffee new file mode 100644 index 00000000..674313b5 --- /dev/null +++ b/test/fixtures/coffee/rack_application.coffee @@ -0,0 +1,256 @@ +# The `RackApplication` class is responsible for managing a +# [Nack](http://josh.github.com/nack/) pool for a given Rack +# application. Incoming HTTP requests are dispatched to +# `RackApplication` instances by an `HttpServer`, where they are +# subsequently handled by a pool of Nack worker processes. By default, +# Pow tells Nack to use a maximum of two worker processes per +# application, but this can be overridden with the configuration's +# `workers` option. +# +# Before creating the Nack pool, Pow executes the `.powrc` and +# `.powenv` scripts if they're present in the application root, +# captures their environment variables, and passes them along to the +# Nack worker processes. This lets you modify your `RUBYOPT` to use +# different Ruby options, for example. +# +# If [rvm](http://rvm.beginrescueend.com/) is installed and an +# `.rvmrc` file is present in the application's root, Pow will load +# both before creating the Nack pool. This makes it easy to run an +# app with a specific version of Ruby. +# +# Nack workers remain running until they're killed, restarted (by +# touching the `tmp/restart.txt` file in the application root), or +# until the application has not served requests for the length of time +# specified in the configuration's `timeout` option (15 minutes by +# default). + +async = require "async" +fs = require "fs" +nack = require "nack" + +{bufferLines, pause, sourceScriptEnv} = require "./util" +{join, exists, basename, resolve} = require "path" + +module.exports = class RackApplication + # Create a `RackApplication` for the given configuration and + # root path. The application begins life in the uninitialized + # state. + constructor: (@configuration, @root, @firstHost) -> + @logger = @configuration.getLogger join "apps", basename @root + @readyCallbacks = [] + @quitCallbacks = [] + @statCallbacks = [] + + # Queue `callback` to be invoked when the application becomes ready, + # then start the initialization process. If the application's state + # is ready, the callback is invoked immediately. + ready: (callback) -> + if @state is "ready" + callback() + else + @readyCallbacks.push callback + @initialize() + + # Tell the application to quit and queue `callback` to be invoked + # when all workers have exited. If the application has already quit, + # the callback is invoked immediately. + quit: (callback) -> + if @state + @quitCallbacks.push callback if callback + @terminate() + else + callback?() + + # Stat `tmp/restart.txt` in the application root and invoke the + # given callback with a single argument indicating whether or not + # the file has been touched since the last call to + # `queryRestartFile`. + queryRestartFile: (callback) -> + fs.stat join(@root, "tmp/restart.txt"), (err, stats) => + if err + @mtime = null + callback false + else + lastMtime = @mtime + @mtime = stats.mtime.getTime() + callback lastMtime isnt @mtime + + # Check to see if `tmp/always_restart.txt` is present in the + # application root, and set the pool's `runOnce` option + # accordingly. Invoke `callback` when the existence check has + # finished. (Multiple calls to this method are aggregated.) + setPoolRunOnceFlag: (callback) -> + unless @statCallbacks.length + exists join(@root, "tmp/always_restart.txt"), (alwaysRestart) => + @pool.runOnce = alwaysRestart + statCallback() for statCallback in @statCallbacks + @statCallbacks = [] + + @statCallbacks.push callback + + # Collect environment variables from `.powrc` and `.powenv`, in that + # order, if present. The idea is that `.powrc` files can be checked + # into a source code repository for global configuration, leaving + # `.powenv` free for any necessary local overrides. + loadScriptEnvironment: (env, callback) -> + async.reduce [".powrc", ".envrc", ".powenv"], env, (env, filename, callback) => + exists script = join(@root, filename), (scriptExists) -> + if scriptExists + sourceScriptEnv script, env, callback + else + callback null, env + , callback + + # If `.rvmrc` and `$HOME/.rvm/scripts/rvm` are present, load rvm, + # source `.rvmrc`, and invoke `callback` with the resulting + # environment variables. If `.rvmrc` is present but rvm is not + # installed, invoke `callback` without sourcing `.rvmrc`. + # Before loading rvm, Pow invokes a helper script that shows a + # deprecation notice if it has not yet been displayed. + loadRvmEnvironment: (env, callback) -> + exists script = join(@root, ".rvmrc"), (rvmrcExists) => + if rvmrcExists + exists rvm = @configuration.rvmPath, (rvmExists) => + if rvmExists + libexecPath = resolve "#{__dirname}/../libexec" + before = """ + '#{libexecPath}/pow_rvm_deprecation_notice' '#{[@firstHost]}' + source '#{rvm}' > /dev/null + """.trim() + sourceScriptEnv script, env, {before}, callback + else + callback null, env + else + callback null, env + + # Stat `tmp/restart.txt` to cache its mtime, then load the + # application's full environment from `.powrc`, `.powenv`, and + # `.rvmrc`. + loadEnvironment: (callback) -> + @queryRestartFile => + @loadScriptEnvironment @configuration.env, (err, env) => + if err then callback err + else @loadRvmEnvironment env, (err, env) => + if err then callback err + else callback null, env + + # Begin the initialization process if the application is in the + # uninitialized state. (If the application is terminating, queue a + # call to `initialize` after all workers have exited.) + initialize: -> + if @state + if @state is "terminating" + @quit => @initialize() + return + + @state = "initializing" + + # Load the application's environment. If an error is raised or + # either of the environment scripts exits with a non-zero status, + # reset the application's state and log the error. + @loadEnvironment (err, env) => + if err + @state = null + @logger.error err.message + @logger.error "stdout: #{err.stdout}" + @logger.error "stderr: #{err.stderr}" + + # Set the application's state to ready. Then create the Nack + # pool instance using the `workers` and `timeout` options from + # the application's environment or the global configuration. + else + @state = "ready" + + @pool = nack.createPool join(@root, "config.ru"), + env: env + size: env?.POW_WORKERS ? @configuration.workers + idle: (env?.POW_TIMEOUT ? @configuration.timeout) * 1000 + + # Log the workers' stderr and stdout, and log each worker's + # PID as it spawns and exits. + bufferLines @pool.stdout, (line) => @logger.info line + bufferLines @pool.stderr, (line) => @logger.warning line + + @pool.on "worker:spawn", (process) => + @logger.debug "nack worker #{process.child.pid} spawned" + + @pool.on "worker:exit", (process) => + @logger.debug "nack worker exited" + + # Invoke and remove all queued callbacks, passing along the + # error, if any. + readyCallback err for readyCallback in @readyCallbacks + @readyCallbacks = [] + + # Begin the termination process. (If the application is initializing, + # wait until it is ready before shutting down.) + terminate: -> + if @state is "initializing" + @ready => @terminate() + + else if @state is "ready" + @state = "terminating" + + # Instruct all workers to exit. After the processes have + # terminated, reset the application's state, then invoke and + # remove all queued callbacks. + @pool.quit => + @state = null + @mtime = null + @pool = null + + quitCallback() for quitCallback in @quitCallbacks + @quitCallbacks = [] + + # Handle an incoming HTTP request. Wait until the application is in + # the ready state, restart the workers if necessary, then pass the + # request along to the Nack pool. If the Nack worker raises an + # exception handling the request, reset the application. + handle: (req, res, next, callback) -> + resume = pause req + @ready (err) => + return next err if err + @setPoolRunOnceFlag => + @restartIfNecessary => + req.proxyMetaVariables = + SERVER_PORT: @configuration.dstPort.toString() + try + @pool.proxy req, res, (err) => + @quit() if err + next err + finally + resume() + callback?() + + # Terminate the application, re-initialize it, and invoke the given + # callback when the application's state becomes ready. + restart: (callback) -> + @quit => + @ready callback + + # Restart the application if `tmp/restart.txt` has been touched + # since the last call to this function. + restartIfNecessary: (callback) -> + @queryRestartFile (mtimeChanged) => + if mtimeChanged + @restart callback + else + callback() + + # Append RVM autoload boilerplate to the application's `.powrc` + # file. This is called by the RVM deprecation notice mini-app. + writeRvmBoilerplate: -> + powrc = join @root, ".powrc" + boilerplate = @constructor.rvmBoilerplate + + fs.readFile powrc, "utf8", (err, contents) -> + contents ?= "" + if contents.indexOf(boilerplate) is -1 + fs.writeFile powrc, "#{boilerplate}\n#{contents}" + + @rvmBoilerplate: """ + if [ -f "$rvm_path/scripts/rvm" ] && [ -f ".rvmrc" ]; then + source "$rvm_path/scripts/rvm" + source ".rvmrc" + fi + """ diff --git a/test/fixtures/coffee/xipd.coffee b/test/fixtures/coffee/xipd.coffee new file mode 100644 index 00000000..8b49f1ff --- /dev/null +++ b/test/fixtures/coffee/xipd.coffee @@ -0,0 +1,110 @@ +dnsserver = require "dnsserver" + +exports.Server = class Server extends dnsserver.Server + NS_T_A = 1 + NS_T_NS = 2 + NS_T_CNAME = 5 + NS_T_SOA = 6 + NS_C_IN = 1 + NS_RCODE_NXDOMAIN = 3 + + constructor: (domain, @rootAddress) -> + super + @domain = domain.toLowerCase() + @soa = createSOA @domain + @on "request", @handleRequest + + handleRequest: (req, res) => + question = req.question + subdomain = @extractSubdomain question.name + + if subdomain? and isARequest question + res.addRR question.name, NS_T_A, NS_C_IN, 600, subdomain.getAddress() + else if subdomain?.isEmpty() and isNSRequest question + res.addRR question.name, NS_T_SOA, NS_C_IN, 600, @soa, true + else + res.header.rcode = NS_RCODE_NXDOMAIN + + res.send() + + extractSubdomain: (name) -> + Subdomain.extract name, @domain, @rootAddress + + isARequest = (question) -> + question.type is NS_T_A and question.class is NS_C_IN + + isNSRequest = (question) -> + question.type is NS_T_NS and question.class is NS_C_IN + + createSOA = (domain) -> + mname = "ns-1.#{domain}" + rname = "hostmaster.#{domain}" + serial = parseInt new Date().getTime() / 1000 + refresh = 28800 + retry = 7200 + expire = 604800 + minimum = 3600 + dnsserver.createSOA mname, rname, serial, refresh, retry, expire, minimum + +exports.createServer = (domain, address = "127.0.0.1") -> + new Server domain, address + +exports.Subdomain = class Subdomain + @extract: (name, domain, address) -> + return unless name + name = name.toLowerCase() + offset = name.length - domain.length + + if domain is name.slice offset + subdomain = if 0 >= offset then null else name.slice 0, offset - 1 + new constructor subdomain, address if constructor = @for subdomain + + @for: (subdomain = "") -> + if IPAddressSubdomain.pattern.test subdomain + IPAddressSubdomain + else if EncodedSubdomain.pattern.test subdomain + EncodedSubdomain + else + Subdomain + + constructor: (@subdomain, @address) -> + @labels = subdomain?.split(".") ? [] + @length = @labels.length + + isEmpty: -> + @length is 0 + + getAddress: -> + @address + +class IPAddressSubdomain extends Subdomain + @pattern = /// (^|\.) + ((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3} + (25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?) + $ /// + + getAddress: -> + @labels.slice(-4).join "." + +class EncodedSubdomain extends Subdomain + @pattern = /(^|\.)[a-z0-9]{1,7}$/ + + getAddress: -> + decode @labels[@length - 1] + +exports.encode = encode = (ip) -> + value = 0 + for byte, index in ip.split "." + value += parseInt(byte, 10) << (index * 8) + (value >>> 0).toString 36 + +PATTERN = /^[a-z0-9]{1,7}$/ + +exports.decode = decode = (string) -> + return unless PATTERN.test string + value = parseInt string, 36 + ip = [] + for i in [1..4] + ip.push value & 0xFF + value >>= 8 + ip.join "." diff --git a/test/fixtures/cpp/env.cpp b/test/fixtures/cpp/env.cpp new file mode 100644 index 00000000..beddc19e --- /dev/null +++ b/test/fixtures/cpp/env.cpp @@ -0,0 +1,81 @@ +/* + This file is part of the PhantomJS project from Ofi Labs. + + Copyright (C) 2012 execjosh, http://execjosh.blogspot.com + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "env.h" + +#include +#include +#include + +static Env *env_instance = (Env *)NULL; + +Env *Env::instance() +{ + if ((Env *)NULL == env_instance) + env_instance = new Env(); + + return env_instance; +} + +Env::Env() + : QObject(QCoreApplication::instance()) +{ +} + +// public: + +void Env::parse(const char **envp) +{ + const char **env = (const char **)NULL; + QString envvar, name, value; + int indexOfEquals; + // Loop for each of the = pairs and split them into a map + for (env = envp; *env != (const char *)NULL; env++) { + envvar = QString(*env); + indexOfEquals = envvar.indexOf('='); + if (0 >= indexOfEquals) { + // Should never happen because names cannot contain "=" and cannot + // be empty. If it does happen, then just ignore this record. + // See: http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap08.html + continue; + } + // Extract name and value (if it exists) from envvar + // NOTE: + // QString::mid() will gracefully return an empty QString when the + // specified position index is >= the length() of the string + name = envvar.left(indexOfEquals); + value = envvar.mid(indexOfEquals + 1); + m_map.insert(name, value); + } +} + +QVariantMap Env::asVariantMap() const +{ + return m_map; +} diff --git a/test/fixtures/cpp/env.h b/test/fixtures/cpp/env.h new file mode 100644 index 00000000..6d72b39b --- /dev/null +++ b/test/fixtures/cpp/env.h @@ -0,0 +1,52 @@ +/* + This file is part of the PhantomJS project from Ofi Labs. + + Copyright (C) 2012 execjosh, http://execjosh.blogspot.com + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ENV_H +#define ENV_H + +#include +#include + +class Env : public QObject +{ + Q_OBJECT + +public: + static Env *instance(); + + void parse(const char ** envp); + QVariantMap asVariantMap() const; + +private: + Env(); + + QVariantMap m_map; +}; + +#endif // ENV_H diff --git a/test/fixtures/cpp/key.cpp b/test/fixtures/cpp/key.cpp new file mode 100644 index 00000000..57ab842b --- /dev/null +++ b/test/fixtures/cpp/key.cpp @@ -0,0 +1,382 @@ +// Copyright (c) 2009-2012 The Bitcoin developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. + +#include + +#include +#include + +#include "key.h" + +// Generate a private key from just the secret parameter +int EC_KEY_regenerate_key(EC_KEY *eckey, BIGNUM *priv_key) +{ + int ok = 0; + BN_CTX *ctx = NULL; + EC_POINT *pub_key = NULL; + + if (!eckey) return 0; + + const EC_GROUP *group = EC_KEY_get0_group(eckey); + + if ((ctx = BN_CTX_new()) == NULL) + goto err; + + pub_key = EC_POINT_new(group); + + if (pub_key == NULL) + goto err; + + if (!EC_POINT_mul(group, pub_key, priv_key, NULL, NULL, ctx)) + goto err; + + EC_KEY_set_private_key(eckey,priv_key); + EC_KEY_set_public_key(eckey,pub_key); + + ok = 1; + +err: + + if (pub_key) + EC_POINT_free(pub_key); + if (ctx != NULL) + BN_CTX_free(ctx); + + return(ok); +} + +// Perform ECDSA key recovery (see SEC1 4.1.6) for curves over (mod p)-fields +// recid selects which key is recovered +// if check is nonzero, additional checks are performed +int ECDSA_SIG_recover_key_GFp(EC_KEY *eckey, ECDSA_SIG *ecsig, const unsigned char *msg, int msglen, int recid, int check) +{ + if (!eckey) return 0; + + int ret = 0; + BN_CTX *ctx = NULL; + + BIGNUM *x = NULL; + BIGNUM *e = NULL; + BIGNUM *order = NULL; + BIGNUM *sor = NULL; + BIGNUM *eor = NULL; + BIGNUM *field = NULL; + EC_POINT *R = NULL; + EC_POINT *O = NULL; + EC_POINT *Q = NULL; + BIGNUM *rr = NULL; + BIGNUM *zero = NULL; + int n = 0; + int i = recid / 2; + + const EC_GROUP *group = EC_KEY_get0_group(eckey); + if ((ctx = BN_CTX_new()) == NULL) { ret = -1; goto err; } + BN_CTX_start(ctx); + order = BN_CTX_get(ctx); + if (!EC_GROUP_get_order(group, order, ctx)) { ret = -2; goto err; } + x = BN_CTX_get(ctx); + if (!BN_copy(x, order)) { ret=-1; goto err; } + if (!BN_mul_word(x, i)) { ret=-1; goto err; } + if (!BN_add(x, x, ecsig->r)) { ret=-1; goto err; } + field = BN_CTX_get(ctx); + if (!EC_GROUP_get_curve_GFp(group, field, NULL, NULL, ctx)) { ret=-2; goto err; } + if (BN_cmp(x, field) >= 0) { ret=0; goto err; } + if ((R = EC_POINT_new(group)) == NULL) { ret = -2; goto err; } + if (!EC_POINT_set_compressed_coordinates_GFp(group, R, x, recid % 2, ctx)) { ret=0; goto err; } + if (check) + { + if ((O = EC_POINT_new(group)) == NULL) { ret = -2; goto err; } + if (!EC_POINT_mul(group, O, NULL, R, order, ctx)) { ret=-2; goto err; } + if (!EC_POINT_is_at_infinity(group, O)) { ret = 0; goto err; } + } + if ((Q = EC_POINT_new(group)) == NULL) { ret = -2; goto err; } + n = EC_GROUP_get_degree(group); + e = BN_CTX_get(ctx); + if (!BN_bin2bn(msg, msglen, e)) { ret=-1; goto err; } + if (8*msglen > n) BN_rshift(e, e, 8-(n & 7)); + zero = BN_CTX_get(ctx); + if (!BN_zero(zero)) { ret=-1; goto err; } + if (!BN_mod_sub(e, zero, e, order, ctx)) { ret=-1; goto err; } + rr = BN_CTX_get(ctx); + if (!BN_mod_inverse(rr, ecsig->r, order, ctx)) { ret=-1; goto err; } + sor = BN_CTX_get(ctx); + if (!BN_mod_mul(sor, ecsig->s, rr, order, ctx)) { ret=-1; goto err; } + eor = BN_CTX_get(ctx); + if (!BN_mod_mul(eor, e, rr, order, ctx)) { ret=-1; goto err; } + if (!EC_POINT_mul(group, Q, eor, R, sor, ctx)) { ret=-2; goto err; } + if (!EC_KEY_set_public_key(eckey, Q)) { ret=-2; goto err; } + + ret = 1; + +err: + if (ctx) { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } + if (R != NULL) EC_POINT_free(R); + if (O != NULL) EC_POINT_free(O); + if (Q != NULL) EC_POINT_free(Q); + return ret; +} + +void CKey::SetCompressedPubKey() +{ + EC_KEY_set_conv_form(pkey, POINT_CONVERSION_COMPRESSED); + fCompressedPubKey = true; +} + +void CKey::Reset() +{ + fCompressedPubKey = false; + pkey = EC_KEY_new_by_curve_name(NID_secp256k1); + if (pkey == NULL) + throw key_error("CKey::CKey() : EC_KEY_new_by_curve_name failed"); + fSet = false; +} + +CKey::CKey() +{ + Reset(); +} + +CKey::CKey(const CKey& b) +{ + pkey = EC_KEY_dup(b.pkey); + if (pkey == NULL) + throw key_error("CKey::CKey(const CKey&) : EC_KEY_dup failed"); + fSet = b.fSet; +} + +CKey& CKey::operator=(const CKey& b) +{ + if (!EC_KEY_copy(pkey, b.pkey)) + throw key_error("CKey::operator=(const CKey&) : EC_KEY_copy failed"); + fSet = b.fSet; + return (*this); +} + +CKey::~CKey() +{ + EC_KEY_free(pkey); +} + +bool CKey::IsNull() const +{ + return !fSet; +} + +bool CKey::IsCompressed() const +{ + return fCompressedPubKey; +} + +void CKey::MakeNewKey(bool fCompressed) +{ + if (!EC_KEY_generate_key(pkey)) + throw key_error("CKey::MakeNewKey() : EC_KEY_generate_key failed"); + if (fCompressed) + SetCompressedPubKey(); + fSet = true; +} + +bool CKey::SetPrivKey(const CPrivKey& vchPrivKey) +{ + const unsigned char* pbegin = &vchPrivKey[0]; + if (!d2i_ECPrivateKey(&pkey, &pbegin, vchPrivKey.size())) + return false; + fSet = true; + return true; +} + +bool CKey::SetSecret(const CSecret& vchSecret, bool fCompressed) +{ + EC_KEY_free(pkey); + pkey = EC_KEY_new_by_curve_name(NID_secp256k1); + if (pkey == NULL) + throw key_error("CKey::SetSecret() : EC_KEY_new_by_curve_name failed"); + if (vchSecret.size() != 32) + throw key_error("CKey::SetSecret() : secret must be 32 bytes"); + BIGNUM *bn = BN_bin2bn(&vchSecret[0],32,BN_new()); + if (bn == NULL) + throw key_error("CKey::SetSecret() : BN_bin2bn failed"); + if (!EC_KEY_regenerate_key(pkey,bn)) + { + BN_clear_free(bn); + throw key_error("CKey::SetSecret() : EC_KEY_regenerate_key failed"); + } + BN_clear_free(bn); + fSet = true; + if (fCompressed || fCompressedPubKey) + SetCompressedPubKey(); + return true; +} + +CSecret CKey::GetSecret(bool &fCompressed) const +{ + CSecret vchRet; + vchRet.resize(32); + const BIGNUM *bn = EC_KEY_get0_private_key(pkey); + int nBytes = BN_num_bytes(bn); + if (bn == NULL) + throw key_error("CKey::GetSecret() : EC_KEY_get0_private_key failed"); + int n=BN_bn2bin(bn,&vchRet[32 - nBytes]); + if (n != nBytes) + throw key_error("CKey::GetSecret(): BN_bn2bin failed"); + fCompressed = fCompressedPubKey; + return vchRet; +} + +CPrivKey CKey::GetPrivKey() const +{ + int nSize = i2d_ECPrivateKey(pkey, NULL); + if (!nSize) + throw key_error("CKey::GetPrivKey() : i2d_ECPrivateKey failed"); + CPrivKey vchPrivKey(nSize, 0); + unsigned char* pbegin = &vchPrivKey[0]; + if (i2d_ECPrivateKey(pkey, &pbegin) != nSize) + throw key_error("CKey::GetPrivKey() : i2d_ECPrivateKey returned unexpected size"); + return vchPrivKey; +} + +bool CKey::SetPubKey(const CPubKey& vchPubKey) +{ + const unsigned char* pbegin = &vchPubKey.vchPubKey[0]; + if (!o2i_ECPublicKey(&pkey, &pbegin, vchPubKey.vchPubKey.size())) + return false; + fSet = true; + if (vchPubKey.vchPubKey.size() == 33) + SetCompressedPubKey(); + return true; +} + +CPubKey CKey::GetPubKey() const +{ + int nSize = i2o_ECPublicKey(pkey, NULL); + if (!nSize) + throw key_error("CKey::GetPubKey() : i2o_ECPublicKey failed"); + std::vector vchPubKey(nSize, 0); + unsigned char* pbegin = &vchPubKey[0]; + if (i2o_ECPublicKey(pkey, &pbegin) != nSize) + throw key_error("CKey::GetPubKey() : i2o_ECPublicKey returned unexpected size"); + return CPubKey(vchPubKey); +} + +bool CKey::Sign(uint256 hash, std::vector& vchSig) +{ + unsigned int nSize = ECDSA_size(pkey); + vchSig.resize(nSize); // Make sure it is big enough + if (!ECDSA_sign(0, (unsigned char*)&hash, sizeof(hash), &vchSig[0], &nSize, pkey)) + { + vchSig.clear(); + return false; + } + vchSig.resize(nSize); // Shrink to fit actual size + return true; +} + +// create a compact signature (65 bytes), which allows reconstructing the used public key +// The format is one header byte, followed by two times 32 bytes for the serialized r and s values. +// The header byte: 0x1B = first key with even y, 0x1C = first key with odd y, +// 0x1D = second key with even y, 0x1E = second key with odd y +bool CKey::SignCompact(uint256 hash, std::vector& vchSig) +{ + bool fOk = false; + ECDSA_SIG *sig = ECDSA_do_sign((unsigned char*)&hash, sizeof(hash), pkey); + if (sig==NULL) + return false; + vchSig.clear(); + vchSig.resize(65,0); + int nBitsR = BN_num_bits(sig->r); + int nBitsS = BN_num_bits(sig->s); + if (nBitsR <= 256 && nBitsS <= 256) + { + int nRecId = -1; + for (int i=0; i<4; i++) + { + CKey keyRec; + keyRec.fSet = true; + if (fCompressedPubKey) + keyRec.SetCompressedPubKey(); + if (ECDSA_SIG_recover_key_GFp(keyRec.pkey, sig, (unsigned char*)&hash, sizeof(hash), i, 1) == 1) + if (keyRec.GetPubKey() == this->GetPubKey()) + { + nRecId = i; + break; + } + } + + if (nRecId == -1) + throw key_error("CKey::SignCompact() : unable to construct recoverable key"); + + vchSig[0] = nRecId+27+(fCompressedPubKey ? 4 : 0); + BN_bn2bin(sig->r,&vchSig[33-(nBitsR+7)/8]); + BN_bn2bin(sig->s,&vchSig[65-(nBitsS+7)/8]); + fOk = true; + } + ECDSA_SIG_free(sig); + return fOk; +} + +// reconstruct public key from a compact signature +// This is only slightly more CPU intensive than just verifying it. +// If this function succeeds, the recovered public key is guaranteed to be valid +// (the signature is a valid signature of the given data for that key) +bool CKey::SetCompactSignature(uint256 hash, const std::vector& vchSig) +{ + if (vchSig.size() != 65) + return false; + int nV = vchSig[0]; + if (nV<27 || nV>=35) + return false; + ECDSA_SIG *sig = ECDSA_SIG_new(); + BN_bin2bn(&vchSig[1],32,sig->r); + BN_bin2bn(&vchSig[33],32,sig->s); + + EC_KEY_free(pkey); + pkey = EC_KEY_new_by_curve_name(NID_secp256k1); + if (nV >= 31) + { + SetCompressedPubKey(); + nV -= 4; + } + if (ECDSA_SIG_recover_key_GFp(pkey, sig, (unsigned char*)&hash, sizeof(hash), nV - 27, 0) == 1) + { + fSet = true; + ECDSA_SIG_free(sig); + return true; + } + return false; +} + +bool CKey::Verify(uint256 hash, const std::vector& vchSig) +{ + // -1 = error, 0 = bad sig, 1 = good + if (ECDSA_verify(0, (unsigned char*)&hash, sizeof(hash), &vchSig[0], vchSig.size(), pkey) != 1) + return false; + + return true; +} + +bool CKey::VerifyCompact(uint256 hash, const std::vector& vchSig) +{ + CKey key; + if (!key.SetCompactSignature(hash, vchSig)) + return false; + if (GetPubKey() != key.GetPubKey()) + return false; + + return true; +} + +bool CKey::IsValid() +{ + if (!fSet) + return false; + + bool fCompr; + CSecret secret = GetSecret(fCompr); + CKey key2; + key2.SetSecret(secret, fCompr); + return GetPubKey() == key2.GetPubKey(); +} diff --git a/test/fixtures/cpp/key.h b/test/fixtures/cpp/key.h new file mode 100644 index 00000000..945c4998 --- /dev/null +++ b/test/fixtures/cpp/key.h @@ -0,0 +1,162 @@ +// Copyright (c) 2009-2010 Satoshi Nakamoto +// Copyright (c) 2009-2012 The Bitcoin developers +// Distributed under the MIT/X11 software license, see the accompanying +// file COPYING or http://www.opensource.org/licenses/mit-license.php. +#ifndef BITCOIN_KEY_H +#define BITCOIN_KEY_H + +#include +#include + +#include "allocators.h" +#include "serialize.h" +#include "uint256.h" +#include "util.h" + +#include // for EC_KEY definition + +// secp160k1 +// const unsigned int PRIVATE_KEY_SIZE = 192; +// const unsigned int PUBLIC_KEY_SIZE = 41; +// const unsigned int SIGNATURE_SIZE = 48; +// +// secp192k1 +// const unsigned int PRIVATE_KEY_SIZE = 222; +// const unsigned int PUBLIC_KEY_SIZE = 49; +// const unsigned int SIGNATURE_SIZE = 57; +// +// secp224k1 +// const unsigned int PRIVATE_KEY_SIZE = 250; +// const unsigned int PUBLIC_KEY_SIZE = 57; +// const unsigned int SIGNATURE_SIZE = 66; +// +// secp256k1: +// const unsigned int PRIVATE_KEY_SIZE = 279; +// const unsigned int PUBLIC_KEY_SIZE = 65; +// const unsigned int SIGNATURE_SIZE = 72; +// +// see www.keylength.com +// script supports up to 75 for single byte push + +class key_error : public std::runtime_error +{ +public: + explicit key_error(const std::string& str) : std::runtime_error(str) {} +}; + +/** A reference to a CKey: the Hash160 of its serialized public key */ +class CKeyID : public uint160 +{ +public: + CKeyID() : uint160(0) { } + CKeyID(const uint160 &in) : uint160(in) { } +}; + +/** A reference to a CScript: the Hash160 of its serialization (see script.h) */ +class CScriptID : public uint160 +{ +public: + CScriptID() : uint160(0) { } + CScriptID(const uint160 &in) : uint160(in) { } +}; + +/** An encapsulated public key. */ +class CPubKey { +private: + std::vector vchPubKey; + friend class CKey; + +public: + CPubKey() { } + CPubKey(const std::vector &vchPubKeyIn) : vchPubKey(vchPubKeyIn) { } + friend bool operator==(const CPubKey &a, const CPubKey &b) { return a.vchPubKey == b.vchPubKey; } + friend bool operator!=(const CPubKey &a, const CPubKey &b) { return a.vchPubKey != b.vchPubKey; } + friend bool operator<(const CPubKey &a, const CPubKey &b) { return a.vchPubKey < b.vchPubKey; } + + IMPLEMENT_SERIALIZE( + READWRITE(vchPubKey); + ) + + CKeyID GetID() const { + return CKeyID(Hash160(vchPubKey)); + } + + uint256 GetHash() const { + return Hash(vchPubKey.begin(), vchPubKey.end()); + } + + bool IsValid() const { + return vchPubKey.size() == 33 || vchPubKey.size() == 65; + } + + bool IsCompressed() const { + return vchPubKey.size() == 33; + } + + std::vector Raw() const { + return vchPubKey; + } +}; + + +// secure_allocator is defined in serialize.h +// CPrivKey is a serialized private key, with all parameters included (279 bytes) +typedef std::vector > CPrivKey; +// CSecret is a serialization of just the secret parameter (32 bytes) +typedef std::vector > CSecret; + +/** An encapsulated OpenSSL Elliptic Curve key (public and/or private) */ +class CKey +{ +protected: + EC_KEY* pkey; + bool fSet; + bool fCompressedPubKey; + + void SetCompressedPubKey(); + +public: + + void Reset(); + + CKey(); + CKey(const CKey& b); + + CKey& operator=(const CKey& b); + + ~CKey(); + + bool IsNull() const; + bool IsCompressed() const; + + void MakeNewKey(bool fCompressed); + bool SetPrivKey(const CPrivKey& vchPrivKey); + bool SetSecret(const CSecret& vchSecret, bool fCompressed = false); + CSecret GetSecret(bool &fCompressed) const; + CPrivKey GetPrivKey() const; + bool SetPubKey(const CPubKey& vchPubKey); + CPubKey GetPubKey() const; + + bool Sign(uint256 hash, std::vector& vchSig); + + // create a compact signature (65 bytes), which allows reconstructing the used public key + // The format is one header byte, followed by two times 32 bytes for the serialized r and s values. + // The header byte: 0x1B = first key with even y, 0x1C = first key with odd y, + // 0x1D = second key with even y, 0x1E = second key with odd y + bool SignCompact(uint256 hash, std::vector& vchSig); + + // reconstruct public key from a compact signature + // This is only slightly more CPU intensive than just verifying it. + // If this function succeeds, the recovered public key is guaranteed to be valid + // (the signature is a valid signature of the given data for that key) + bool SetCompactSignature(uint256 hash, const std::vector& vchSig); + + bool Verify(uint256 hash, const std::vector& vchSig); + + // Verify a compact signature + bool VerifyCompact(uint256 hash, const std::vector& vchSig); + + bool IsValid(); +}; + +#endif diff --git a/test/fixtures/cpp/main.cpp b/test/fixtures/cpp/main.cpp new file mode 100644 index 00000000..8ca481d1 --- /dev/null +++ b/test/fixtures/cpp/main.cpp @@ -0,0 +1,74 @@ +/* + This file is part of the PhantomJS project from Ofi Labs. + + Copyright (C) 2011 Ariya Hidayat + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "consts.h" +#include "utils.h" +#include "env.h" +#include "phantom.h" + +#ifdef Q_OS_LINUX +#include "client/linux/handler/exception_handler.h" +#endif + +#include + +#if QT_VERSION != QT_VERSION_CHECK(4, 8, 0) +#error Something is wrong with the setup. Please report to the mailing list! +#endif + +int main(int argc, char** argv, const char** envp) +{ +#ifdef Q_OS_LINUX + google_breakpad::ExceptionHandler eh("/tmp", NULL, Utils::exceptionHandler, NULL, true); +#endif + + // Registering an alternative Message Handler + qInstallMsgHandler(Utils::messageHandler); + + QApplication app(argc, argv); + +#ifdef STATIC_BUILD + Q_INIT_RESOURCE(WebKit); + Q_INIT_RESOURCE(InspectorBackendStub); +#endif + + app.setWindowIcon(QIcon(":/phantomjs-icon.png")); + app.setApplicationName("PhantomJS"); + app.setOrganizationName("Ofi Labs"); + app.setOrganizationDomain("www.ofilabs.com"); + app.setApplicationVersion(PHANTOMJS_VERSION_STRING); + + Env::instance()->parse(envp); + + Phantom phantom; + if (phantom.execute()) { + app.exec(); + } + return phantom.returnValue(); +} diff --git a/test/fixtures/cpp/scanner.cc b/test/fixtures/cpp/scanner.cc new file mode 100644 index 00000000..f24af2ed --- /dev/null +++ b/test/fixtures/cpp/scanner.cc @@ -0,0 +1,1088 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Features shared by parsing and pre-parsing scanners. + +#include "scanner.h" + +#include "../include/v8stdint.h" +#include "char-predicates-inl.h" + +namespace v8 { +namespace internal { + +// ---------------------------------------------------------------------------- +// Scanner + +Scanner::Scanner(UnicodeCache* unicode_cache) + : unicode_cache_(unicode_cache), + octal_pos_(Location::invalid()), + harmony_scoping_(false), + harmony_modules_(false) { } + + +void Scanner::Initialize(Utf16CharacterStream* source) { + source_ = source; + // Need to capture identifiers in order to recognize "get" and "set" + // in object literals. + Init(); + // Skip initial whitespace allowing HTML comment ends just like + // after a newline and scan first token. + has_line_terminator_before_next_ = true; + SkipWhiteSpace(); + Scan(); +} + + +uc32 Scanner::ScanHexNumber(int expected_length) { + ASSERT(expected_length <= 4); // prevent overflow + + uc32 digits[4] = { 0, 0, 0, 0 }; + uc32 x = 0; + for (int i = 0; i < expected_length; i++) { + digits[i] = c0_; + int d = HexValue(c0_); + if (d < 0) { + // According to ECMA-262, 3rd, 7.8.4, page 18, these hex escapes + // should be illegal, but other JS VMs just return the + // non-escaped version of the original character. + + // Push back digits that we have advanced past. + for (int j = i-1; j >= 0; j--) { + PushBack(digits[j]); + } + return -1; + } + x = x * 16 + d; + Advance(); + } + + return x; +} + + +// Ensure that tokens can be stored in a byte. +STATIC_ASSERT(Token::NUM_TOKENS <= 0x100); + +// Table of one-character tokens, by character (0x00..0x7f only). +static const byte one_char_tokens[] = { + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LPAREN, // 0x28 + Token::RPAREN, // 0x29 + Token::ILLEGAL, + Token::ILLEGAL, + Token::COMMA, // 0x2c + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::COLON, // 0x3a + Token::SEMICOLON, // 0x3b + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::CONDITIONAL, // 0x3f + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LBRACK, // 0x5b + Token::ILLEGAL, + Token::RBRACK, // 0x5d + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::ILLEGAL, + Token::LBRACE, // 0x7b + Token::ILLEGAL, + Token::RBRACE, // 0x7d + Token::BIT_NOT, // 0x7e + Token::ILLEGAL +}; + + +Token::Value Scanner::Next() { + current_ = next_; + has_line_terminator_before_next_ = false; + has_multiline_comment_before_next_ = false; + if (static_cast(c0_) <= 0x7f) { + Token::Value token = static_cast(one_char_tokens[c0_]); + if (token != Token::ILLEGAL) { + int pos = source_pos(); + next_.token = token; + next_.location.beg_pos = pos; + next_.location.end_pos = pos + 1; + Advance(); + return current_.token; + } + } + Scan(); + return current_.token; +} + + +static inline bool IsByteOrderMark(uc32 c) { + // The Unicode value U+FFFE is guaranteed never to be assigned as a + // Unicode character; this implies that in a Unicode context the + // 0xFF, 0xFE byte pattern can only be interpreted as the U+FEFF + // character expressed in little-endian byte order (since it could + // not be a U+FFFE character expressed in big-endian byte + // order). Nevertheless, we check for it to be compatible with + // Spidermonkey. + return c == 0xFEFF || c == 0xFFFE; +} + + +bool Scanner::SkipWhiteSpace() { + int start_position = source_pos(); + + while (true) { + // We treat byte-order marks (BOMs) as whitespace for better + // compatibility with Spidermonkey and other JavaScript engines. + while (unicode_cache_->IsWhiteSpace(c0_) || IsByteOrderMark(c0_)) { + // IsWhiteSpace() includes line terminators! + if (unicode_cache_->IsLineTerminator(c0_)) { + // Ignore line terminators, but remember them. This is necessary + // for automatic semicolon insertion. + has_line_terminator_before_next_ = true; + } + Advance(); + } + + // If there is an HTML comment end '-->' at the beginning of a + // line (with only whitespace in front of it), we treat the rest + // of the line as a comment. This is in line with the way + // SpiderMonkey handles it. + if (c0_ == '-' && has_line_terminator_before_next_) { + Advance(); + if (c0_ == '-') { + Advance(); + if (c0_ == '>') { + // Treat the rest of the line as a comment. + SkipSingleLineComment(); + // Continue skipping white space after the comment. + continue; + } + PushBack('-'); // undo Advance() + } + PushBack('-'); // undo Advance() + } + // Return whether or not we skipped any characters. + return source_pos() != start_position; + } +} + + +Token::Value Scanner::SkipSingleLineComment() { + Advance(); + + // The line terminator at the end of the line is not considered + // to be part of the single-line comment; it is recognized + // separately by the lexical grammar and becomes part of the + // stream of input elements for the syntactic grammar (see + // ECMA-262, section 7.4). + while (c0_ >= 0 && !unicode_cache_->IsLineTerminator(c0_)) { + Advance(); + } + + return Token::WHITESPACE; +} + + +Token::Value Scanner::SkipMultiLineComment() { + ASSERT(c0_ == '*'); + Advance(); + + while (c0_ >= 0) { + uc32 ch = c0_; + Advance(); + if (unicode_cache_->IsLineTerminator(ch)) { + // Following ECMA-262, section 7.4, a comment containing + // a newline will make the comment count as a line-terminator. + has_multiline_comment_before_next_ = true; + } + // If we have reached the end of the multi-line comment, we + // consume the '/' and insert a whitespace. This way all + // multi-line comments are treated as whitespace. + if (ch == '*' && c0_ == '/') { + c0_ = ' '; + return Token::WHITESPACE; + } + } + + // Unterminated multi-line comment. + return Token::ILLEGAL; +} + + +Token::Value Scanner::ScanHtmlComment() { + // Check for -= + Advance(); + if (c0_ == '-') { + Advance(); + if (c0_ == '>' && has_line_terminator_before_next_) { + // For compatibility with SpiderMonkey, we skip lines that + // start with an HTML comment end '-->'. + token = SkipSingleLineComment(); + } else { + token = Token::DEC; + } + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_SUB); + } else { + token = Token::SUB; + } + break; + + case '*': + // * *= + token = Select('=', Token::ASSIGN_MUL, Token::MUL); + break; + + case '%': + // % %= + token = Select('=', Token::ASSIGN_MOD, Token::MOD); + break; + + case '/': + // / // /* /= + Advance(); + if (c0_ == '/') { + token = SkipSingleLineComment(); + } else if (c0_ == '*') { + token = SkipMultiLineComment(); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_DIV); + } else { + token = Token::DIV; + } + break; + + case '&': + // & && &= + Advance(); + if (c0_ == '&') { + token = Select(Token::AND); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_BIT_AND); + } else { + token = Token::BIT_AND; + } + break; + + case '|': + // | || |= + Advance(); + if (c0_ == '|') { + token = Select(Token::OR); + } else if (c0_ == '=') { + token = Select(Token::ASSIGN_BIT_OR); + } else { + token = Token::BIT_OR; + } + break; + + case '^': + // ^ ^= + token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR); + break; + + case '.': + // . Number + Advance(); + if (IsDecimalDigit(c0_)) { + token = ScanNumber(true); + } else { + token = Token::PERIOD; + } + break; + + case ':': + token = Select(Token::COLON); + break; + + case ';': + token = Select(Token::SEMICOLON); + break; + + case ',': + token = Select(Token::COMMA); + break; + + case '(': + token = Select(Token::LPAREN); + break; + + case ')': + token = Select(Token::RPAREN); + break; + + case '[': + token = Select(Token::LBRACK); + break; + + case ']': + token = Select(Token::RBRACK); + break; + + case '{': + token = Select(Token::LBRACE); + break; + + case '}': + token = Select(Token::RBRACE); + break; + + case '?': + token = Select(Token::CONDITIONAL); + break; + + case '~': + token = Select(Token::BIT_NOT); + break; + + default: + if (unicode_cache_->IsIdentifierStart(c0_)) { + token = ScanIdentifierOrKeyword(); + } else if (IsDecimalDigit(c0_)) { + token = ScanNumber(false); + } else if (SkipWhiteSpace()) { + token = Token::WHITESPACE; + } else if (c0_ < 0) { + token = Token::EOS; + } else { + token = Select(Token::ILLEGAL); + } + break; + } + + // Continue scanning for tokens as long as we're just skipping + // whitespace. + } while (token == Token::WHITESPACE); + + next_.location.end_pos = source_pos(); + next_.token = token; +} + + +void Scanner::SeekForward(int pos) { + // After this call, we will have the token at the given position as + // the "next" token. The "current" token will be invalid. + if (pos == next_.location.beg_pos) return; + int current_pos = source_pos(); + ASSERT_EQ(next_.location.end_pos, current_pos); + // Positions inside the lookahead token aren't supported. + ASSERT(pos >= current_pos); + if (pos != current_pos) { + source_->SeekForward(pos - source_->pos()); + Advance(); + // This function is only called to seek to the location + // of the end of a function (at the "}" token). It doesn't matter + // whether there was a line terminator in the part we skip. + has_line_terminator_before_next_ = false; + has_multiline_comment_before_next_ = false; + } + Scan(); +} + + +bool Scanner::ScanEscape() { + uc32 c = c0_; + Advance(); + + // Skip escaped newlines. + if (unicode_cache_->IsLineTerminator(c)) { + // Allow CR+LF newlines in multiline string literals. + if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); + // Allow LF+CR newlines in multiline string literals. + if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); + return true; + } + + switch (c) { + case '\'': // fall through + case '"' : // fall through + case '\\': break; + case 'b' : c = '\b'; break; + case 'f' : c = '\f'; break; + case 'n' : c = '\n'; break; + case 'r' : c = '\r'; break; + case 't' : c = '\t'; break; + case 'u' : { + c = ScanHexNumber(4); + if (c < 0) return false; + break; + } + case 'v' : c = '\v'; break; + case 'x' : { + c = ScanHexNumber(2); + if (c < 0) return false; + break; + } + case '0' : // fall through + case '1' : // fall through + case '2' : // fall through + case '3' : // fall through + case '4' : // fall through + case '5' : // fall through + case '6' : // fall through + case '7' : c = ScanOctalEscape(c, 2); break; + } + + // According to ECMA-262, section 7.8.4, characters not covered by the + // above cases should be illegal, but they are commonly handled as + // non-escaped characters by JS VMs. + AddLiteralChar(c); + return true; +} + + +// Octal escapes of the forms '\0xx' and '\xxx' are not a part of +// ECMA-262. Other JS VMs support them. +uc32 Scanner::ScanOctalEscape(uc32 c, int length) { + uc32 x = c - '0'; + int i = 0; + for (; i < length; i++) { + int d = c0_ - '0'; + if (d < 0 || d > 7) break; + int nx = x * 8 + d; + if (nx >= 256) break; + x = nx; + Advance(); + } + // Anything except '\0' is an octal escape sequence, illegal in strict mode. + // Remember the position of octal escape sequences so that an error + // can be reported later (in strict mode). + // We don't report the error immediately, because the octal escape can + // occur before the "use strict" directive. + if (c != '0' || i > 0) { + octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1); + } + return x; +} + + +Token::Value Scanner::ScanString() { + uc32 quote = c0_; + Advance(); // consume quote + + LiteralScope literal(this); + while (c0_ != quote && c0_ >= 0 + && !unicode_cache_->IsLineTerminator(c0_)) { + uc32 c = c0_; + Advance(); + if (c == '\\') { + if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; + } else { + AddLiteralChar(c); + } + } + if (c0_ != quote) return Token::ILLEGAL; + literal.Complete(); + + Advance(); // consume quote + return Token::STRING; +} + + +void Scanner::ScanDecimalDigits() { + while (IsDecimalDigit(c0_)) + AddLiteralCharAdvance(); +} + + +Token::Value Scanner::ScanNumber(bool seen_period) { + ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction + + enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; + + LiteralScope literal(this); + if (seen_period) { + // we have already seen a decimal point of the float + AddLiteralChar('.'); + ScanDecimalDigits(); // we know we have at least one digit + + } else { + // if the first character is '0' we must check for octals and hex + if (c0_ == '0') { + int start_pos = source_pos(); // For reporting octal positions. + AddLiteralCharAdvance(); + + // either 0, 0exxx, 0Exxx, 0.xxx, an octal number, or a hex number + if (c0_ == 'x' || c0_ == 'X') { + // hex number + kind = HEX; + AddLiteralCharAdvance(); + if (!IsHexDigit(c0_)) { + // we must have at least one hex digit after 'x'/'X' + return Token::ILLEGAL; + } + while (IsHexDigit(c0_)) { + AddLiteralCharAdvance(); + } + } else if ('0' <= c0_ && c0_ <= '7') { + // (possible) octal number + kind = OCTAL; + while (true) { + if (c0_ == '8' || c0_ == '9') { + kind = DECIMAL; + break; + } + if (c0_ < '0' || '7' < c0_) { + // Octal literal finished. + octal_pos_ = Location(start_pos, source_pos()); + break; + } + AddLiteralCharAdvance(); + } + } + } + + // Parse decimal digits and allow trailing fractional part. + if (kind == DECIMAL) { + ScanDecimalDigits(); // optional + if (c0_ == '.') { + AddLiteralCharAdvance(); + ScanDecimalDigits(); // optional + } + } + } + + // scan exponent, if any + if (c0_ == 'e' || c0_ == 'E') { + ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number + if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed + // scan exponent + AddLiteralCharAdvance(); + if (c0_ == '+' || c0_ == '-') + AddLiteralCharAdvance(); + if (!IsDecimalDigit(c0_)) { + // we must have at least one decimal digit after 'e'/'E' + return Token::ILLEGAL; + } + ScanDecimalDigits(); + } + + // The source character immediately following a numeric literal must + // not be an identifier start or a decimal digit; see ECMA-262 + // section 7.8.3, page 17 (note that we read only one decimal digit + // if the value is 0). + if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) + return Token::ILLEGAL; + + literal.Complete(); + + return Token::NUMBER; +} + + +uc32 Scanner::ScanIdentifierUnicodeEscape() { + Advance(); + if (c0_ != 'u') return -1; + Advance(); + uc32 result = ScanHexNumber(4); + if (result < 0) PushBack('u'); + return result; +} + + +// ---------------------------------------------------------------------------- +// Keyword Matcher + +#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \ + KEYWORD_GROUP('b') \ + KEYWORD("break", Token::BREAK) \ + KEYWORD_GROUP('c') \ + KEYWORD("case", Token::CASE) \ + KEYWORD("catch", Token::CATCH) \ + KEYWORD("class", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("const", Token::CONST) \ + KEYWORD("continue", Token::CONTINUE) \ + KEYWORD_GROUP('d') \ + KEYWORD("debugger", Token::DEBUGGER) \ + KEYWORD("default", Token::DEFAULT) \ + KEYWORD("delete", Token::DELETE) \ + KEYWORD("do", Token::DO) \ + KEYWORD_GROUP('e') \ + KEYWORD("else", Token::ELSE) \ + KEYWORD("enum", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("export", harmony_modules \ + ? Token::EXPORT : Token::FUTURE_RESERVED_WORD) \ + KEYWORD("extends", Token::FUTURE_RESERVED_WORD) \ + KEYWORD_GROUP('f') \ + KEYWORD("false", Token::FALSE_LITERAL) \ + KEYWORD("finally", Token::FINALLY) \ + KEYWORD("for", Token::FOR) \ + KEYWORD("function", Token::FUNCTION) \ + KEYWORD_GROUP('i') \ + KEYWORD("if", Token::IF) \ + KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("import", harmony_modules \ + ? Token::IMPORT : Token::FUTURE_RESERVED_WORD) \ + KEYWORD("in", Token::IN) \ + KEYWORD("instanceof", Token::INSTANCEOF) \ + KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('l') \ + KEYWORD("let", harmony_scoping \ + ? Token::LET : Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('n') \ + KEYWORD("new", Token::NEW) \ + KEYWORD("null", Token::NULL_LITERAL) \ + KEYWORD_GROUP('p') \ + KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD_GROUP('r') \ + KEYWORD("return", Token::RETURN) \ + KEYWORD_GROUP('s') \ + KEYWORD("static", Token::FUTURE_STRICT_RESERVED_WORD) \ + KEYWORD("super", Token::FUTURE_RESERVED_WORD) \ + KEYWORD("switch", Token::SWITCH) \ + KEYWORD_GROUP('t') \ + KEYWORD("this", Token::THIS) \ + KEYWORD("throw", Token::THROW) \ + KEYWORD("true", Token::TRUE_LITERAL) \ + KEYWORD("try", Token::TRY) \ + KEYWORD("typeof", Token::TYPEOF) \ + KEYWORD_GROUP('v') \ + KEYWORD("var", Token::VAR) \ + KEYWORD("void", Token::VOID) \ + KEYWORD_GROUP('w') \ + KEYWORD("while", Token::WHILE) \ + KEYWORD("with", Token::WITH) \ + KEYWORD_GROUP('y') \ + KEYWORD("yield", Token::FUTURE_STRICT_RESERVED_WORD) + + +static Token::Value KeywordOrIdentifierToken(const char* input, + int input_length, + bool harmony_scoping, + bool harmony_modules) { + ASSERT(input_length >= 1); + const int kMinLength = 2; + const int kMaxLength = 10; + if (input_length < kMinLength || input_length > kMaxLength) { + return Token::IDENTIFIER; + } + switch (input[0]) { + default: +#define KEYWORD_GROUP_CASE(ch) \ + break; \ + case ch: +#define KEYWORD(keyword, token) \ + { \ + /* 'keyword' is a char array, so sizeof(keyword) is */ \ + /* strlen(keyword) plus 1 for the NUL char. */ \ + const int keyword_length = sizeof(keyword) - 1; \ + STATIC_ASSERT(keyword_length >= kMinLength); \ + STATIC_ASSERT(keyword_length <= kMaxLength); \ + if (input_length == keyword_length && \ + input[1] == keyword[1] && \ + (keyword_length <= 2 || input[2] == keyword[2]) && \ + (keyword_length <= 3 || input[3] == keyword[3]) && \ + (keyword_length <= 4 || input[4] == keyword[4]) && \ + (keyword_length <= 5 || input[5] == keyword[5]) && \ + (keyword_length <= 6 || input[6] == keyword[6]) && \ + (keyword_length <= 7 || input[7] == keyword[7]) && \ + (keyword_length <= 8 || input[8] == keyword[8]) && \ + (keyword_length <= 9 || input[9] == keyword[9])) { \ + return token; \ + } \ + } + KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD) + } + return Token::IDENTIFIER; +} + + +Token::Value Scanner::ScanIdentifierOrKeyword() { + ASSERT(unicode_cache_->IsIdentifierStart(c0_)); + LiteralScope literal(this); + // Scan identifier start character. + if (c0_ == '\\') { + uc32 c = ScanIdentifierUnicodeEscape(); + // Only allow legal identifier start characters. + if (c < 0 || + c == '\\' || // No recursive escapes. + !unicode_cache_->IsIdentifierStart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + return ScanIdentifierSuffix(&literal); + } + + uc32 first_char = c0_; + Advance(); + AddLiteralChar(first_char); + + // Scan the rest of the identifier characters. + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ != '\\') { + uc32 next_char = c0_; + Advance(); + AddLiteralChar(next_char); + continue; + } + // Fallthrough if no longer able to complete keyword. + return ScanIdentifierSuffix(&literal); + } + + literal.Complete(); + + if (next_.literal_chars->is_ascii()) { + Vector chars = next_.literal_chars->ascii_literal(); + return KeywordOrIdentifierToken(chars.start(), + chars.length(), + harmony_scoping_, + harmony_modules_); + } + + return Token::IDENTIFIER; +} + + +Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) { + // Scan the rest of the identifier characters. + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ == '\\') { + uc32 c = ScanIdentifierUnicodeEscape(); + // Only allow legal identifier part characters. + if (c < 0 || + c == '\\' || + !unicode_cache_->IsIdentifierPart(c)) { + return Token::ILLEGAL; + } + AddLiteralChar(c); + } else { + AddLiteralChar(c0_); + Advance(); + } + } + literal->Complete(); + + return Token::IDENTIFIER; +} + + +bool Scanner::ScanRegExpPattern(bool seen_equal) { + // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags + bool in_character_class = false; + + // Previous token is either '/' or '/=', in the second case, the + // pattern starts at =. + next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1); + next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0); + + // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, + // the scanner should pass uninterpreted bodies to the RegExp + // constructor. + LiteralScope literal(this); + if (seen_equal) { + AddLiteralChar('='); + } + + while (c0_ != '/' || in_character_class) { + if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; + if (c0_ == '\\') { // Escape sequence. + AddLiteralCharAdvance(); + if (unicode_cache_->IsLineTerminator(c0_) || c0_ < 0) return false; + AddLiteralCharAdvance(); + // If the escape allows more characters, i.e., \x??, \u????, or \c?, + // only "safe" characters are allowed (letters, digits, underscore), + // otherwise the escape isn't valid and the invalid character has + // its normal meaning. I.e., we can just continue scanning without + // worrying whether the following characters are part of the escape + // or not, since any '/', '\\' or '[' is guaranteed to not be part + // of the escape sequence. + + // TODO(896): At some point, parse RegExps more throughly to capture + // octal esacpes in strict mode. + } else { // Unescaped character. + if (c0_ == '[') in_character_class = true; + if (c0_ == ']') in_character_class = false; + AddLiteralCharAdvance(); + } + } + Advance(); // consume '/' + + literal.Complete(); + + return true; +} + + +bool Scanner::ScanLiteralUnicodeEscape() { + ASSERT(c0_ == '\\'); + uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0}; + Advance(); + int i = 1; + if (c0_ == 'u') { + i++; + while (i < 6) { + Advance(); + if (!IsHexDigit(c0_)) break; + chars_read[i] = c0_; + i++; + } + } + if (i < 6) { + // Incomplete escape. Undo all advances and return false. + while (i > 0) { + i--; + PushBack(chars_read[i]); + } + return false; + } + // Complete escape. Add all chars to current literal buffer. + for (int i = 0; i < 6; i++) { + AddLiteralChar(chars_read[i]); + } + return true; +} + + +bool Scanner::ScanRegExpFlags() { + // Scan regular expression flags. + LiteralScope literal(this); + while (unicode_cache_->IsIdentifierPart(c0_)) { + if (c0_ != '\\') { + AddLiteralCharAdvance(); + } else { + if (!ScanLiteralUnicodeEscape()) { + break; + } + } + } + literal.Complete(); + + next_.location.end_pos = source_pos() - 1; + return true; +} + +} } // namespace v8::internal diff --git a/test/fixtures/cpp/scanner.h b/test/fixtures/cpp/scanner.h new file mode 100644 index 00000000..4de413b8 --- /dev/null +++ b/test/fixtures/cpp/scanner.h @@ -0,0 +1,576 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Features shared by parsing and pre-parsing scanners. + +#ifndef V8_SCANNER_H_ +#define V8_SCANNER_H_ + +#include "allocation.h" +#include "char-predicates.h" +#include "checks.h" +#include "globals.h" +#include "token.h" +#include "unicode-inl.h" +#include "utils.h" + +namespace v8 { +namespace internal { + + +// General collection of (multi-)bit-flags that can be passed to scanners and +// parsers to signify their (initial) mode of operation. +enum ParsingFlags { + kNoParsingFlags = 0, + // Embed LanguageMode values in parsing flags, i.e., equivalent to: + // CLASSIC_MODE = 0, + // STRICT_MODE, + // EXTENDED_MODE, + kLanguageModeMask = 0x03, + kAllowLazy = 0x04, + kAllowNativesSyntax = 0x08, + kAllowModules = 0x10 +}; + +STATIC_ASSERT((kLanguageModeMask & CLASSIC_MODE) == CLASSIC_MODE); +STATIC_ASSERT((kLanguageModeMask & STRICT_MODE) == STRICT_MODE); +STATIC_ASSERT((kLanguageModeMask & EXTENDED_MODE) == EXTENDED_MODE); + + +// Returns the value (0 .. 15) of a hexadecimal character c. +// If c is not a legal hexadecimal character, returns a value < 0. +inline int HexValue(uc32 c) { + c -= '0'; + if (static_cast(c) <= 9) return c; + c = (c | 0x20) - ('a' - '0'); // detect 0x11..0x16 and 0x31..0x36. + if (static_cast(c) <= 5) return c + 10; + return -1; +} + + +// --------------------------------------------------------------------- +// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer. +// A code unit is a 16 bit value representing either a 16 bit code point +// or one part of a surrogate pair that make a single 21 bit code point. + +class Utf16CharacterStream { + public: + Utf16CharacterStream() : pos_(0) { } + virtual ~Utf16CharacterStream() { } + + // Returns and advances past the next UTF-16 code unit in the input + // stream. If there are no more code units, it returns a negative + // value. + inline uc32 Advance() { + if (buffer_cursor_ < buffer_end_ || ReadBlock()) { + pos_++; + return static_cast(*(buffer_cursor_++)); + } + // Note: currently the following increment is necessary to avoid a + // parser problem! The scanner treats the final kEndOfInput as + // a code unit with a position, and does math relative to that + // position. + pos_++; + + return kEndOfInput; + } + + // Return the current position in the code unit stream. + // Starts at zero. + inline unsigned pos() const { return pos_; } + + // Skips forward past the next code_unit_count UTF-16 code units + // in the input, or until the end of input if that comes sooner. + // Returns the number of code units actually skipped. If less + // than code_unit_count, + inline unsigned SeekForward(unsigned code_unit_count) { + unsigned buffered_chars = + static_cast(buffer_end_ - buffer_cursor_); + if (code_unit_count <= buffered_chars) { + buffer_cursor_ += code_unit_count; + pos_ += code_unit_count; + return code_unit_count; + } + return SlowSeekForward(code_unit_count); + } + + // Pushes back the most recently read UTF-16 code unit (or negative + // value if at end of input), i.e., the value returned by the most recent + // call to Advance. + // Must not be used right after calling SeekForward. + virtual void PushBack(int32_t code_unit) = 0; + + protected: + static const uc32 kEndOfInput = -1; + + // Ensures that the buffer_cursor_ points to the code_unit at + // position pos_ of the input, if possible. If the position + // is at or after the end of the input, return false. If there + // are more code_units available, return true. + virtual bool ReadBlock() = 0; + virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0; + + const uc16* buffer_cursor_; + const uc16* buffer_end_; + unsigned pos_; +}; + + +class UnicodeCache { +// --------------------------------------------------------------------- +// Caching predicates used by scanners. + public: + UnicodeCache() {} + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + + StaticResource* utf8_decoder() { + return &utf8_decoder_; + } + + bool IsIdentifierStart(unibrow::uchar c) { return kIsIdentifierStart.get(c); } + bool IsIdentifierPart(unibrow::uchar c) { return kIsIdentifierPart.get(c); } + bool IsLineTerminator(unibrow::uchar c) { return kIsLineTerminator.get(c); } + bool IsWhiteSpace(unibrow::uchar c) { return kIsWhiteSpace.get(c); } + + private: + unibrow::Predicate kIsIdentifierStart; + unibrow::Predicate kIsIdentifierPart; + unibrow::Predicate kIsLineTerminator; + unibrow::Predicate kIsWhiteSpace; + StaticResource utf8_decoder_; + + DISALLOW_COPY_AND_ASSIGN(UnicodeCache); +}; + + +// ---------------------------------------------------------------------------- +// LiteralBuffer - Collector of chars of literals. + +class LiteralBuffer { + public: + LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } + + ~LiteralBuffer() { + if (backing_store_.length() > 0) { + backing_store_.Dispose(); + } + } + + INLINE(void AddChar(uint32_t code_unit)) { + if (position_ >= backing_store_.length()) ExpandBuffer(); + if (is_ascii_) { + if (code_unit < kMaxAsciiCharCodeU) { + backing_store_[position_] = static_cast(code_unit); + position_ += kASCIISize; + return; + } + ConvertToUtf16(); + } + ASSERT(code_unit < 0x10000u); + *reinterpret_cast(&backing_store_[position_]) = code_unit; + position_ += kUC16Size; + } + + bool is_ascii() { return is_ascii_; } + + Vector utf16_literal() { + ASSERT(!is_ascii_); + ASSERT((position_ & 0x1) == 0); + return Vector( + reinterpret_cast(backing_store_.start()), + position_ >> 1); + } + + Vector ascii_literal() { + ASSERT(is_ascii_); + return Vector( + reinterpret_cast(backing_store_.start()), + position_); + } + + int length() { + return is_ascii_ ? position_ : (position_ >> 1); + } + + void Reset() { + position_ = 0; + is_ascii_ = true; + } + + private: + static const int kInitialCapacity = 16; + static const int kGrowthFactory = 4; + static const int kMinConversionSlack = 256; + static const int kMaxGrowth = 1 * MB; + inline int NewCapacity(int min_capacity) { + int capacity = Max(min_capacity, backing_store_.length()); + int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth); + return new_capacity; + } + + void ExpandBuffer() { + Vector new_store = Vector::New(NewCapacity(kInitialCapacity)); + memcpy(new_store.start(), backing_store_.start(), position_); + backing_store_.Dispose(); + backing_store_ = new_store; + } + + void ConvertToUtf16() { + ASSERT(is_ascii_); + Vector new_store; + int new_content_size = position_ * kUC16Size; + if (new_content_size >= backing_store_.length()) { + // Ensure room for all currently read code units as UC16 as well + // as the code unit about to be stored. + new_store = Vector::New(NewCapacity(new_content_size)); + } else { + new_store = backing_store_; + } + char* src = reinterpret_cast(backing_store_.start()); + uc16* dst = reinterpret_cast(new_store.start()); + for (int i = position_ - 1; i >= 0; i--) { + dst[i] = src[i]; + } + if (new_store.start() != backing_store_.start()) { + backing_store_.Dispose(); + backing_store_ = new_store; + } + position_ = new_content_size; + is_ascii_ = false; + } + + bool is_ascii_; + int position_; + Vector backing_store_; + + DISALLOW_COPY_AND_ASSIGN(LiteralBuffer); +}; + + +// ---------------------------------------------------------------------------- +// JavaScript Scanner. + +class Scanner { + public: + // Scoped helper for literal recording. Automatically drops the literal + // if aborting the scanning before it's complete. + class LiteralScope { + public: + explicit LiteralScope(Scanner* self) + : scanner_(self), complete_(false) { + scanner_->StartLiteral(); + } + ~LiteralScope() { + if (!complete_) scanner_->DropLiteral(); + } + void Complete() { + scanner_->TerminateLiteral(); + complete_ = true; + } + + private: + Scanner* scanner_; + bool complete_; + }; + + // Representation of an interval of source positions. + struct Location { + Location(int b, int e) : beg_pos(b), end_pos(e) { } + Location() : beg_pos(0), end_pos(0) { } + + bool IsValid() const { + return beg_pos >= 0 && end_pos >= beg_pos; + } + + static Location invalid() { return Location(-1, -1); } + + int beg_pos; + int end_pos; + }; + + // -1 is outside of the range of any real source code. + static const int kNoOctalLocation = -1; + + typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + + explicit Scanner(UnicodeCache* scanner_contants); + + void Initialize(Utf16CharacterStream* source); + + // Returns the next token and advances input. + Token::Value Next(); + // Returns the current token again. + Token::Value current_token() { return current_.token; } + // Returns the location information for the current token + // (the token last returned by Next()). + Location location() const { return current_.location; } + // Returns the literal string, if any, for the current token (the + // token last returned by Next()). The string is 0-terminated. + // Literal strings are collected for identifiers, strings, and + // numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). + Vector literal_ascii_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->ascii_literal(); + } + Vector literal_utf16_string() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->utf16_literal(); + } + bool is_literal_ascii() { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->is_ascii(); + } + int literal_length() const { + ASSERT_NOT_NULL(current_.literal_chars); + return current_.literal_chars->length(); + } + + bool literal_contains_escapes() const { + Location location = current_.location; + int source_length = (location.end_pos - location.beg_pos); + if (current_.token == Token::STRING) { + // Subtract delimiters. + source_length -= 2; + } + return current_.literal_chars->length() != source_length; + } + + // Similar functions for the upcoming token. + + // One token look-ahead (past the token returned by Next()). + Token::Value peek() const { return next_.token; } + + Location peek_location() const { return next_.location; } + + // Returns the literal string for the next token (the token that + // would be returned if Next() were called). + Vector next_literal_ascii_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->ascii_literal(); + } + Vector next_literal_utf16_string() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->utf16_literal(); + } + bool is_next_literal_ascii() { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->is_ascii(); + } + int next_literal_length() const { + ASSERT_NOT_NULL(next_.literal_chars); + return next_.literal_chars->length(); + } + + UnicodeCache* unicode_cache() { return unicode_cache_; } + + static const int kCharacterLookaheadBufferSize = 1; + + // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. + uc32 ScanOctalEscape(uc32 c, int length); + + // Returns the location of the last seen octal literal. + Location octal_position() const { return octal_pos_; } + void clear_octal_position() { octal_pos_ = Location::invalid(); } + + // Seek forward to the given position. This operation does not + // work in general, for instance when there are pushed back + // characters, but works for seeking forward until simple delimiter + // tokens, which is what it is used for. + void SeekForward(int pos); + + bool HarmonyScoping() const { + return harmony_scoping_; + } + void SetHarmonyScoping(bool scoping) { + harmony_scoping_ = scoping; + } + bool HarmonyModules() const { + return harmony_modules_; + } + void SetHarmonyModules(bool modules) { + harmony_modules_ = modules; + } + + + // Returns true if there was a line terminator before the peek'ed token, + // possibly inside a multi-line comment. + bool HasAnyLineTerminatorBeforeNext() const { + return has_line_terminator_before_next_ || + has_multiline_comment_before_next_; + } + + // Scans the input as a regular expression pattern, previous + // character(s) must be /(=). Returns true if a pattern is scanned. + bool ScanRegExpPattern(bool seen_equal); + // Returns true if regexp flags are scanned (always since flags can + // be empty). + bool ScanRegExpFlags(); + + // Tells whether the buffer contains an identifier (no escapes). + // Used for checking if a property name is an identifier. + static bool IsIdentifier(unibrow::CharacterStream* buffer); + + private: + // The current and look-ahead token. + struct TokenDesc { + Token::Value token; + Location location; + LiteralBuffer* literal_chars; + }; + + // Call this after setting source_ to the input. + void Init() { + // Set c0_ (one character ahead) + STATIC_ASSERT(kCharacterLookaheadBufferSize == 1); + Advance(); + // Initialize current_ to not refer to a literal. + current_.literal_chars = NULL; + } + + // Literal buffer support + inline void StartLiteral() { + LiteralBuffer* free_buffer = (current_.literal_chars == &literal_buffer1_) ? + &literal_buffer2_ : &literal_buffer1_; + free_buffer->Reset(); + next_.literal_chars = free_buffer; + } + + INLINE(void AddLiteralChar(uc32 c)) { + ASSERT_NOT_NULL(next_.literal_chars); + next_.literal_chars->AddChar(c); + } + + // Complete scanning of a literal. + inline void TerminateLiteral() { + // Does nothing in the current implementation. + } + + // Stops scanning of a literal and drop the collected characters, + // e.g., due to an encountered error. + inline void DropLiteral() { + next_.literal_chars = NULL; + } + + inline void AddLiteralCharAdvance() { + AddLiteralChar(c0_); + Advance(); + } + + // Low-level scanning support. + void Advance() { c0_ = source_->Advance(); } + void PushBack(uc32 ch) { + source_->PushBack(c0_); + c0_ = ch; + } + + inline Token::Value Select(Token::Value tok) { + Advance(); + return tok; + } + + inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) { + Advance(); + if (c0_ == next) { + Advance(); + return then; + } else { + return else_; + } + } + + uc32 ScanHexNumber(int expected_length); + + // Scans a single JavaScript token. + void Scan(); + + bool SkipWhiteSpace(); + Token::Value SkipSingleLineComment(); + Token::Value SkipMultiLineComment(); + // Scans a possible HTML comment -- begins with 'pos() - kCharacterLookaheadBufferSize; + } + + UnicodeCache* unicode_cache_; + + // Buffers collecting literal strings, numbers, etc. + LiteralBuffer literal_buffer1_; + LiteralBuffer literal_buffer2_; + + TokenDesc current_; // desc for current token (as returned by Next()) + TokenDesc next_; // desc for next token (one token look-ahead) + + // Input stream. Must be initialized to an Utf16CharacterStream. + Utf16CharacterStream* source_; + + + // Start position of the octal literal last scanned. + Location octal_pos_; + + // One Unicode character look-ahead; c0_ < 0 at the end of the input. + uc32 c0_; + + // Whether there is a line terminator whitespace character after + // the current token, and before the next. Does not count newlines + // inside multiline comments. + bool has_line_terminator_before_next_; + // Whether there is a multi-line comment that contains a + // line-terminator after the current token, and before the next. + bool has_multiline_comment_before_next_; + // Whether we scan 'let' as a keyword for harmony block-scoped let bindings. + bool harmony_scoping_; + // Whether we scan 'module', 'import', 'export' as keywords. + bool harmony_modules_; +}; + +} } // namespace v8::internal + +#endif // V8_SCANNER_H_ diff --git a/test/fixtures/cpp/utils.h b/test/fixtures/cpp/utils.h new file mode 100644 index 00000000..cbc993ef --- /dev/null +++ b/test/fixtures/cpp/utils.h @@ -0,0 +1,71 @@ +/* + This file is part of the PhantomJS project from Ofi Labs. + + Copyright (C) 2011 Ariya Hidayat + Copyright (C) 2011 Ivan De Marino + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef UTILS_H +#define UTILS_H + +#include +#include +#include + +#include "csconverter.h" +#include "encoding.h" + +class QTemporaryFile; +/** + * Aggregate common utility functions. + * Functions are static methods. + * It's important to notice that, at the moment, this class can't be instantiated by design. + */ +class Utils +{ +public: + static void showUsage(); + static void messageHandler(QtMsgType type, const char *msg); + static bool exceptionHandler(const char* dump_path, const char* minidump_id, void* context, bool succeeded); + static QVariant coffee2js(const QString &script); + static bool injectJsInFrame(const QString &jsFilePath, const QString &libraryPath, QWebFrame *targetFrame, const bool startingScript = false); + static bool injectJsInFrame(const QString &jsFilePath, const Encoding &jsFileEnc, const QString &libraryPath, QWebFrame *targetFrame, const bool startingScript = false); + static QString readResourceFileUtf8(const QString &resourceFilePath); + + static bool loadJSForDebug(const QString &jsFilePath, const Encoding &jsFileEnc, const QString &libraryPath, QWebFrame *targetFrame, const bool autorun = false); + static bool loadJSForDebug(const QString &jsFilePath, const QString &libraryPath, QWebFrame *targetFrame, const bool autorun = false); + static void cleanupFromDebug(); + +private: + static QString findScript(const QString &jsFilePath, const QString& libraryPath); + static QString jsFromScriptFile(const QString& scriptPath, const Encoding& enc); + Utils(); //< This class shouldn't be instantiated + + static QTemporaryFile* m_tempHarness; //< We want to make sure to clean up after ourselves + static QTemporaryFile* m_tempWrapper; +}; + +#endif // UTILS_H diff --git a/test/fixtures/cpp/v8.cc b/test/fixtures/cpp/v8.cc new file mode 100644 index 00000000..2910a070 --- /dev/null +++ b/test/fixtures/cpp/v8.cc @@ -0,0 +1,288 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "v8.h" + +#include "assembler.h" +#include "isolate.h" +#include "elements.h" +#include "bootstrapper.h" +#include "debug.h" +#include "deoptimizer.h" +#include "frames.h" +#include "heap-profiler.h" +#include "hydrogen.h" +#include "lithium-allocator.h" +#include "log.h" +#include "once.h" +#include "platform.h" +#include "runtime-profiler.h" +#include "serialize.h" +#include "store-buffer.h" + +namespace v8 { +namespace internal { + +V8_DECLARE_ONCE(init_once); + +bool V8::is_running_ = false; +bool V8::has_been_set_up_ = false; +bool V8::has_been_disposed_ = false; +bool V8::has_fatal_error_ = false; +bool V8::use_crankshaft_ = true; +List* V8::call_completed_callbacks_ = NULL; + +static LazyMutex entropy_mutex = LAZY_MUTEX_INITIALIZER; + +static EntropySource entropy_source; + + +bool V8::Initialize(Deserializer* des) { + FlagList::EnforceFlagImplications(); + + InitializeOncePerProcess(); + + // The current thread may not yet had entered an isolate to run. + // Note the Isolate::Current() may be non-null because for various + // initialization purposes an initializing thread may be assigned an isolate + // but not actually enter it. + if (i::Isolate::CurrentPerIsolateThreadData() == NULL) { + i::Isolate::EnterDefaultIsolate(); + } + + ASSERT(i::Isolate::CurrentPerIsolateThreadData() != NULL); + ASSERT(i::Isolate::CurrentPerIsolateThreadData()->thread_id().Equals( + i::ThreadId::Current())); + ASSERT(i::Isolate::CurrentPerIsolateThreadData()->isolate() == + i::Isolate::Current()); + + if (IsDead()) return false; + + Isolate* isolate = Isolate::Current(); + if (isolate->IsInitialized()) return true; + + is_running_ = true; + has_been_set_up_ = true; + has_fatal_error_ = false; + has_been_disposed_ = false; + + return isolate->Init(des); +} + + +void V8::SetFatalError() { + is_running_ = false; + has_fatal_error_ = true; +} + + +void V8::TearDown() { + Isolate* isolate = Isolate::Current(); + ASSERT(isolate->IsDefaultIsolate()); + + if (!has_been_set_up_ || has_been_disposed_) return; + + ElementsAccessor::TearDown(); + LOperand::TearDownCaches(); + RegisteredExtension::UnregisterAll(); + + isolate->TearDown(); + delete isolate; + + is_running_ = false; + has_been_disposed_ = true; + + delete call_completed_callbacks_; + call_completed_callbacks_ = NULL; + + OS::TearDown(); +} + + +static void seed_random(uint32_t* state) { + for (int i = 0; i < 2; ++i) { + if (FLAG_random_seed != 0) { + state[i] = FLAG_random_seed; + } else if (entropy_source != NULL) { + uint32_t val; + ScopedLock lock(entropy_mutex.Pointer()); + entropy_source(reinterpret_cast(&val), sizeof(uint32_t)); + state[i] = val; + } else { + state[i] = random(); + } + } +} + + +// Random number generator using George Marsaglia's MWC algorithm. +static uint32_t random_base(uint32_t* state) { + // Initialize seed using the system random(). + // No non-zero seed will ever become zero again. + if (state[0] == 0) seed_random(state); + + // Mix the bits. Never replaces state[i] with 0 if it is nonzero. + state[0] = 18273 * (state[0] & 0xFFFF) + (state[0] >> 16); + state[1] = 36969 * (state[1] & 0xFFFF) + (state[1] >> 16); + + return (state[0] << 14) + (state[1] & 0x3FFFF); +} + + +void V8::SetEntropySource(EntropySource source) { + entropy_source = source; +} + + +void V8::SetReturnAddressLocationResolver( + ReturnAddressLocationResolver resolver) { + StackFrame::SetReturnAddressLocationResolver(resolver); +} + + +// Used by JavaScript APIs +uint32_t V8::Random(Context* context) { + ASSERT(context->IsGlobalContext()); + ByteArray* seed = context->random_seed(); + return random_base(reinterpret_cast(seed->GetDataStartAddress())); +} + + +// Used internally by the JIT and memory allocator for security +// purposes. So, we keep a different state to prevent informations +// leaks that could be used in an exploit. +uint32_t V8::RandomPrivate(Isolate* isolate) { + ASSERT(isolate == Isolate::Current()); + return random_base(isolate->private_random_seed()); +} + + +bool V8::IdleNotification(int hint) { + // Returning true tells the caller that there is no need to call + // IdleNotification again. + if (!FLAG_use_idle_notification) return true; + + // Tell the heap that it may want to adjust. + return HEAP->IdleNotification(hint); +} + + +void V8::AddCallCompletedCallback(CallCompletedCallback callback) { + if (call_completed_callbacks_ == NULL) { // Lazy init. + call_completed_callbacks_ = new List(); + } + for (int i = 0; i < call_completed_callbacks_->length(); i++) { + if (callback == call_completed_callbacks_->at(i)) return; + } + call_completed_callbacks_->Add(callback); +} + + +void V8::RemoveCallCompletedCallback(CallCompletedCallback callback) { + if (call_completed_callbacks_ == NULL) return; + for (int i = 0; i < call_completed_callbacks_->length(); i++) { + if (callback == call_completed_callbacks_->at(i)) { + call_completed_callbacks_->Remove(i); + } + } +} + + +void V8::FireCallCompletedCallback(Isolate* isolate) { + if (call_completed_callbacks_ == NULL) return; + HandleScopeImplementer* handle_scope_implementer = + isolate->handle_scope_implementer(); + if (!handle_scope_implementer->CallDepthIsZero()) return; + // Fire callbacks. Increase call depth to prevent recursive callbacks. + handle_scope_implementer->IncrementCallDepth(); + for (int i = 0; i < call_completed_callbacks_->length(); i++) { + call_completed_callbacks_->at(i)(); + } + handle_scope_implementer->DecrementCallDepth(); +} + + +// Use a union type to avoid type-aliasing optimizations in GCC. +typedef union { + double double_value; + uint64_t uint64_t_value; +} double_int_union; + + +Object* V8::FillHeapNumberWithRandom(Object* heap_number, + Context* context) { + double_int_union r; + uint64_t random_bits = Random(context); + // Convert 32 random bits to 0.(32 random bits) in a double + // by computing: + // ( 1.(20 0s)(32 random bits) x 2^20 ) - (1.0 x 2^20)). + static const double binary_million = 1048576.0; + r.double_value = binary_million; + r.uint64_t_value |= random_bits; + r.double_value -= binary_million; + + HeapNumber::cast(heap_number)->set_value(r.double_value); + return heap_number; +} + +void V8::InitializeOncePerProcessImpl() { + OS::SetUp(); + + use_crankshaft_ = FLAG_crankshaft; + + if (Serializer::enabled()) { + use_crankshaft_ = false; + } + + CPU::SetUp(); + if (!CPU::SupportsCrankshaft()) { + use_crankshaft_ = false; + } + + OS::PostSetUp(); + + RuntimeProfiler::GlobalSetUp(); + + ElementsAccessor::InitializeOncePerProcess(); + + if (FLAG_stress_compaction) { + FLAG_force_marking_deque_overflows = true; + FLAG_gc_global = true; + FLAG_max_new_space_size = (1 << (kPageSizeBits - 10)) * 2; + } + + LOperand::SetUpCaches(); + SetUpJSCallerSavedCodeData(); + SamplerRegistry::SetUp(); + ExternalReference::SetUp(); +} + +void V8::InitializeOncePerProcess() { + CallOnce(&init_once, &InitializeOncePerProcessImpl); +} + +} } // namespace v8::internal diff --git a/test/fixtures/cpp/v8.h b/test/fixtures/cpp/v8.h new file mode 100644 index 00000000..67716d81 --- /dev/null +++ b/test/fixtures/cpp/v8.h @@ -0,0 +1,152 @@ +// Copyright 2011 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// +// Top include for all V8 .cc files. +// + +#ifndef V8_V8_H_ +#define V8_V8_H_ + +#if defined(GOOGLE3) +// Google3 special flag handling. +#if defined(DEBUG) && defined(NDEBUG) +// V8 only uses DEBUG and whenever it is set we are building a debug +// version of V8. We do not use NDEBUG and simply undef it here for +// consistency. +#undef NDEBUG +#endif +#endif // defined(GOOGLE3) + +// V8 only uses DEBUG, but included external files +// may use NDEBUG - make sure they are consistent. +#if defined(DEBUG) && defined(NDEBUG) +#error both DEBUG and NDEBUG are set +#endif + +// Basic includes +#include "../include/v8.h" +#include "v8globals.h" +#include "v8checks.h" +#include "allocation.h" +#include "v8utils.h" +#include "flags.h" + +// Objects & heap +#include "objects-inl.h" +#include "spaces-inl.h" +#include "heap-inl.h" +#include "incremental-marking-inl.h" +#include "mark-compact-inl.h" +#include "log-inl.h" +#include "cpu-profiler-inl.h" +#include "handles-inl.h" +#include "zone-inl.h" + +namespace v8 { +namespace internal { + +class Deserializer; + +class V8 : public AllStatic { + public: + // Global actions. + + // If Initialize is called with des == NULL, the initial state is + // created from scratch. If a non-null Deserializer is given, the + // initial state is created by reading the deserialized data into an + // empty heap. + static bool Initialize(Deserializer* des); + static void TearDown(); + static bool IsRunning() { return is_running_; } + static bool UseCrankshaft() { return use_crankshaft_; } + // To be dead you have to have lived + // TODO(isolates): move IsDead to Isolate. + static bool IsDead() { return has_fatal_error_ || has_been_disposed_; } + static void SetFatalError(); + + // Report process out of memory. Implementation found in api.cc. + static void FatalProcessOutOfMemory(const char* location, + bool take_snapshot = false); + + // Allows an entropy source to be provided for use in random number + // generation. + static void SetEntropySource(EntropySource source); + // Support for return-address rewriting profilers. + static void SetReturnAddressLocationResolver( + ReturnAddressLocationResolver resolver); + // Random number generation support. Not cryptographically safe. + static uint32_t Random(Context* context); + // We use random numbers internally in memory allocation and in the + // compilers for security. In order to prevent information leaks we + // use a separate random state for internal random number + // generation. + static uint32_t RandomPrivate(Isolate* isolate); + static Object* FillHeapNumberWithRandom(Object* heap_number, + Context* context); + + // Idle notification directly from the API. + static bool IdleNotification(int hint); + + static void AddCallCompletedCallback(CallCompletedCallback callback); + static void RemoveCallCompletedCallback(CallCompletedCallback callback); + static void FireCallCompletedCallback(Isolate* isolate); + + private: + static void InitializeOncePerProcessImpl(); + static void InitializeOncePerProcess(); + + // True if engine is currently running + static bool is_running_; + // True if V8 has ever been run + static bool has_been_set_up_; + // True if error has been signaled for current engine + // (reset to false if engine is restarted) + static bool has_fatal_error_; + // True if engine has been shut down + // (reset if engine is restarted) + static bool has_been_disposed_; + // True if we are using the crankshaft optimizing compiler. + static bool use_crankshaft_; + // List of callbacks when a Call completes. + static List* call_completed_callbacks_; +}; + + +// JavaScript defines two kinds of 'nil'. +enum NilValue { kNullValue, kUndefinedValue }; + + +// JavaScript defines two kinds of equality. +enum EqualityKind { kStrictEquality, kNonStrictEquality }; + + +} } // namespace v8::internal + +namespace i = v8::internal; + +#endif // V8_V8_H_ diff --git a/test/fixtures/java/HtmlDomParserContext.java b/test/fixtures/java/HtmlDomParserContext.java new file mode 100644 index 00000000..6c13e3d4 --- /dev/null +++ b/test/fixtures/java/HtmlDomParserContext.java @@ -0,0 +1,243 @@ +/** + * (The MIT License) + * + * Copyright (c) 2008 - 2012: + * + * * {Aaron Patterson}[http://tenderlovemaking.com] + * * {Mike Dalessio}[http://mike.daless.io] + * * {Charles Nutter}[http://blog.headius.com] + * * {Sergio Arbeo}[http://www.serabe.com] + * * {Patrick Mahoney}[http://polycrystal.org] + * * {Yoko Harada}[http://yokolet.blogspot.com] + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * 'Software'), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package nokogiri.internals; + +import static nokogiri.internals.NokogiriHelpers.getNokogiriClass; +import static nokogiri.internals.NokogiriHelpers.isNamespace; +import static nokogiri.internals.NokogiriHelpers.stringOrNil; +import nokogiri.HtmlDocument; +import nokogiri.NokogiriService; +import nokogiri.XmlDocument; + +import org.apache.xerces.parsers.DOMParser; +import org.apache.xerces.xni.Augmentations; +import org.apache.xerces.xni.QName; +import org.apache.xerces.xni.XMLAttributes; +import org.apache.xerces.xni.XNIException; +import org.apache.xerces.xni.parser.XMLDocumentFilter; +import org.apache.xerces.xni.parser.XMLParserConfiguration; +import org.cyberneko.html.HTMLConfiguration; +import org.cyberneko.html.filters.DefaultFilter; +import org.jruby.Ruby; +import org.jruby.RubyClass; +import org.jruby.runtime.ThreadContext; +import org.jruby.runtime.builtin.IRubyObject; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.NodeList; + +/** + * Parser for HtmlDocument. This class actually parses HtmlDocument using NekoHtml. + * + * @author sergio + * @author Patrick Mahoney + * @author Yoko Harada + */ +public class HtmlDomParserContext extends XmlDomParserContext { + + public HtmlDomParserContext(Ruby runtime, IRubyObject options) { + super(runtime, options); + } + + public HtmlDomParserContext(Ruby runtime, IRubyObject encoding, IRubyObject options) { + super(runtime, encoding, options); + } + + @Override + protected void initErrorHandler() { + if (options.strict) { + errorHandler = new NokogiriStrictErrorHandler(options.noError, options.noWarning); + } else { + errorHandler = new NokogiriNonStrictErrorHandler4NekoHtml(options.noError, options.noWarning); + } + } + + @Override + protected void initParser(Ruby runtime) { + XMLParserConfiguration config = new HTMLConfiguration(); + XMLDocumentFilter removeNSAttrsFilter = new RemoveNSAttrsFilter(); + XMLDocumentFilter elementValidityCheckFilter = new ElementValidityCheckFilter(errorHandler); + //XMLDocumentFilter[] filters = { removeNSAttrsFilter, elementValidityCheckFilter}; + XMLDocumentFilter[] filters = { elementValidityCheckFilter}; + + config.setErrorHandler(this.errorHandler); + parser = new DOMParser(config); + + // see http://nekohtml.sourceforge.net/settings.html for details + setProperty("http://cyberneko.org/html/properties/default-encoding", java_encoding); + setProperty("http://cyberneko.org/html/properties/names/elems", "lower"); + setProperty("http://cyberneko.org/html/properties/names/attrs", "lower"); + setProperty("http://cyberneko.org/html/properties/filters", filters); + setFeature("http://cyberneko.org/html/features/report-errors", true); + setFeature("http://xml.org/sax/features/namespaces", false); + setFeature("http://cyberneko.org/html/features/insert-doctype", true); + } + + /** + * Enable NekoHTML feature for balancing tags in a document fragment. + * + * This method is used in XmlNode#in_context method. + */ + public void enableDocumentFragment() { + setFeature("http://cyberneko.org/html/features/balance-tags/document-fragment", true); + } + + @Override + protected XmlDocument getNewEmptyDocument(ThreadContext context) { + IRubyObject[] args = new IRubyObject[0]; + return (XmlDocument) XmlDocument.rbNew(context, getNokogiriClass(context.getRuntime(), "Nokogiri::HTML::Document"), args); + } + + @Override + protected XmlDocument wrapDocument(ThreadContext context, + RubyClass klazz, + Document document) { + HtmlDocument htmlDocument = (HtmlDocument) NokogiriService.HTML_DOCUMENT_ALLOCATOR.allocate(context.getRuntime(), klazz); + htmlDocument.setDocumentNode(context, document); + if (ruby_encoding.isNil()) { + // ruby_encoding might have detected by HtmlDocument::EncodingReader + if (detected_encoding != null && !detected_encoding.isNil()) { + ruby_encoding = detected_encoding; + } else { + // no encoding given & no encoding detected, then try to get it + String charset = tryGetCharsetFromHtml5MetaTag(document); + ruby_encoding = stringOrNil(context.getRuntime(), charset); + } + } + htmlDocument.setEncoding(ruby_encoding); + htmlDocument.setParsedEncoding(java_encoding); + return htmlDocument; + } + + // NekoHtml doesn't understand HTML5 meta tag format. This fails to detect charset + // from an HTML5 style meta tag. Luckily, the meta tag and charset exists in DOM tree + // so, this method attempts to find the charset. + private String tryGetCharsetFromHtml5MetaTag(Document document) { + if (!"html".equalsIgnoreCase(document.getDocumentElement().getNodeName())) return null; + NodeList list = document.getDocumentElement().getChildNodes(); + for (int i = 0; i < list.getLength(); i++) { + if ("head".equalsIgnoreCase(list.item(i).getNodeName())) { + NodeList headers = list.item(i).getChildNodes(); + for (int j = 0; j < headers.getLength(); j++) { + if ("meta".equalsIgnoreCase(headers.item(j).getNodeName())) { + NamedNodeMap nodeMap = headers.item(j).getAttributes(); + for (int k = 0; k < nodeMap.getLength(); k++) { + if ("charset".equalsIgnoreCase(nodeMap.item(k).getNodeName())) { + return nodeMap.item(k).getNodeValue(); + } + } + } + } + } + } + return null; + } + + /** + * Filter to strip out attributes that pertain to XML namespaces. + */ + public static class RemoveNSAttrsFilter extends DefaultFilter { + @Override + public void startElement(QName element, XMLAttributes attrs, + Augmentations augs) throws XNIException { + int i; + for (i = 0; i < attrs.getLength(); ++i) { + if (isNamespace(attrs.getQName(i))) { + attrs.removeAttributeAt(i); + --i; + } + } + + element.uri = null; + super.startElement(element, attrs, augs); + } + } + + public static class ElementValidityCheckFilter extends DefaultFilter { + private NokogiriErrorHandler errorHandler; + + private ElementValidityCheckFilter(NokogiriErrorHandler errorHandler) { + this.errorHandler = errorHandler; + } + + // element names from xhtml1-strict.dtd + private static String[][] element_names = { + {"a", "abbr", "acronym", "address", "area"}, + {"b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button"}, + {"caption", "cite", "code", "col", "colgroup"}, + {"dd", "del", "dfn", "div", "dl", "dt"}, + {"em"}, + {"fieldset", "font", "form", "frame", "frameset"}, + {}, // g + {"h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html"}, + {"i", "iframe", "img", "input", "ins"}, + {}, // j + {"kbd"}, + {"label", "legend", "li", "link"}, + {"map", "meta"}, + {"noframes", "noscript"}, + {"object", "ol", "optgroup", "option"}, + {"p", "param", "pre"}, + {"q"}, + {}, // r + {"s", "samp", "script", "select", "small", "span", "strike", "strong", "style", "sub", "sup"}, + {"table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title", "tr", "tt"}, + {"u", "ul"}, + {"var"}, + {}, // w + {}, // x + {}, // y + {} // z + }; + + private boolean isValid(String testee) { + char[] c = testee.toCharArray(); + int index = new Integer(c[0]) - 97; + if (index > 25) return false; + for (int i=0; i itemListeners = ExtensionListView.createCopyOnWriteList(ItemListener.class); + + /** + * List of registered {@link hudson.slaves.ComputerListener}s. + * @deprecated as of 1.286 + */ + private transient final CopyOnWriteList computerListeners = ExtensionListView.createCopyOnWriteList(ComputerListener.class); + + + @CLIResolver + public static Hudson getInstance() { + return (Hudson)Jenkins.getInstance(); + } + + public Hudson(File root, ServletContext context) throws IOException, InterruptedException, ReactorException { + this(root,context,null); + } + + public Hudson(File root, ServletContext context, PluginManager pluginManager) throws IOException, InterruptedException, ReactorException { + super(root, context, pluginManager); + } + + /** + * Gets all the installed {@link ItemListener}s. + * + * @deprecated as of 1.286. + * Use {@link ItemListener#all()}. + */ + public CopyOnWriteList getJobListeners() { + return itemListeners; + } + + /** + * Gets all the installed {@link ComputerListener}s. + * + * @deprecated as of 1.286. + * Use {@link ComputerListener#all()}. + */ + public CopyOnWriteList getComputerListeners() { + return computerListeners; + } + + /** + * Gets the slave node of the give name, hooked under this Hudson. + * + * @deprecated + * Use {@link #getNode(String)}. Since 1.252. + */ + public Slave getSlave(String name) { + Node n = getNode(name); + if (n instanceof Slave) + return (Slave)n; + return null; + } + + /** + * @deprecated + * Use {@link #getNodes()}. Since 1.252. + */ + public List getSlaves() { + return (List)slaves; + } + + /** + * Updates the slave list. + * + * @deprecated + * Use {@link #setNodes(List)}. Since 1.252. + */ + public void setSlaves(List slaves) throws IOException { + setNodes(slaves); + } + + /** + * @deprecated + * Left only for the compatibility of URLs. + * Should not be invoked for any other purpose. + */ + public TopLevelItem getJob(String name) { + return getItem(name); + } + + /** + * @deprecated + * Used only for mapping jobs to URL in a case-insensitive fashion. + */ + public TopLevelItem getJobCaseInsensitive(String name) { + String match = Functions.toEmailSafeString(name); + for(TopLevelItem item : getItems()) { + if(Functions.toEmailSafeString(item.getName()).equalsIgnoreCase(match)) { + return item; + } + } + return null; + } + + /** + * @deprecated as of 1.317 + * Use {@link #doQuietDown()} instead. + */ + public synchronized void doQuietDown(StaplerResponse rsp) throws IOException, ServletException { + doQuietDown().generateResponse(null, rsp, this); + } + + /** + * RSS feed for log entries. + * + * @deprecated + * As on 1.267, moved to "/log/rss..." + */ + public void doLogRss( StaplerRequest req, StaplerResponse rsp ) throws IOException, ServletException { + String qs = req.getQueryString(); + rsp.sendRedirect2("./log/rss"+(qs==null?"":'?'+qs)); + } + + /** + * @deprecated as of 1.294 + * Define your own check method, instead of relying on this generic one. + */ + public void doFieldCheck(StaplerRequest req, StaplerResponse rsp) throws IOException, ServletException { + doFieldCheck( + fixEmpty(req.getParameter("value")), + fixEmpty(req.getParameter("type")), + fixEmpty(req.getParameter("errorText")), + fixEmpty(req.getParameter("warningText"))).generateResponse(req,rsp,this); + } + + /** + * Checks if the value for a field is set; if not an error or warning text is displayed. + * If the parameter "value" is not set then the parameter "errorText" is displayed + * as an error text. If the parameter "errorText" is not set, then the parameter "warningText" + * is displayed as a warning text. + *

+ * If the text is set and the parameter "type" is set, it will validate that the value is of the + * correct type. Supported types are "number, "number-positive" and "number-negative". + * + * @deprecated as of 1.324 + * Either use client-side validation (e.g. class="required number") + * or define your own check method, instead of relying on this generic one. + */ + public FormValidation doFieldCheck(@QueryParameter(fixEmpty=true) String value, + @QueryParameter(fixEmpty=true) String type, + @QueryParameter(fixEmpty=true) String errorText, + @QueryParameter(fixEmpty=true) String warningText) { + if (value == null) { + if (errorText != null) + return FormValidation.error(errorText); + if (warningText != null) + return FormValidation.warning(warningText); + return FormValidation.error("No error or warning text was set for fieldCheck()."); + } + + if (type != null) { + try { + if (type.equalsIgnoreCase("number")) { + NumberFormat.getInstance().parse(value); + } else if (type.equalsIgnoreCase("number-positive")) { + if (NumberFormat.getInstance().parse(value).floatValue() <= 0) + return FormValidation.error(Messages.Hudson_NotAPositiveNumber()); + } else if (type.equalsIgnoreCase("number-negative")) { + if (NumberFormat.getInstance().parse(value).floatValue() >= 0) + return FormValidation.error(Messages.Hudson_NotANegativeNumber()); + } + } catch (ParseException e) { + return FormValidation.error(Messages.Hudson_NotANumber()); + } + } + + return FormValidation.ok(); + } + + /** + * @deprecated + * Use {@link Functions#isWindows()}. + */ + public static boolean isWindows() { + return File.pathSeparatorChar==';'; + } + + /** + * @deprecated + * Use {@link hudson.Platform#isDarwin()} + */ + public static boolean isDarwin() { + return Platform.isDarwin(); + } + + /** + * @deprecated since 2007-12-18. + * Use {@link #checkPermission(hudson.security.Permission)} + */ + public static boolean adminCheck() throws IOException { + return adminCheck(Stapler.getCurrentRequest(), Stapler.getCurrentResponse()); + } + + /** + * @deprecated since 2007-12-18. + * Use {@link #checkPermission(hudson.security.Permission)} + */ + public static boolean adminCheck(StaplerRequest req,StaplerResponse rsp) throws IOException { + if (isAdmin(req)) return true; + + rsp.sendError(StaplerResponse.SC_FORBIDDEN); + return false; + } + + /** + * Checks if the current user (for which we are processing the current request) + * has the admin access. + * + * @deprecated since 2007-12-18. + * This method is deprecated when Hudson moved from simple Unix root-like model + * of "admin gets to do everything, and others don't have any privilege" to more + * complex {@link hudson.security.ACL} and {@link hudson.security.Permission} based scheme. + * + *

+ * For a quick migration, use {@code Hudson.getInstance().getACL().hasPermission(Hudson.ADMINISTER)} + * To check if the user has the 'administer' role in Hudson. + * + *

+ * But ideally, your plugin should first identify a suitable {@link hudson.security.Permission} (or create one, + * if appropriate), then identify a suitable {@link hudson.security.AccessControlled} object to check its permission + * against. + */ + public static boolean isAdmin() { + return Jenkins.getInstance().getACL().hasPermission(ADMINISTER); + } + + /** + * @deprecated since 2007-12-18. + * Define a custom {@link hudson.security.Permission} and check against ACL. + * See {@link #isAdmin()} for more instructions. + */ + public static boolean isAdmin(StaplerRequest req) { + return isAdmin(); + } + + static { + XSTREAM.alias("hudson",Hudson.class); + } + + /** + * @deprecated only here for backward comp + */ + public static final class MasterComputer extends Jenkins.MasterComputer { + // no op + } + + /** + * @deprecated only here for backward comp + */ + public static class CloudList extends Jenkins.CloudList { + public CloudList(Jenkins h) { + super(h); + } + + public CloudList() {// needed for XStream deserialization + super(); + } + } +} diff --git a/test/fixtures/java/NokogiriService.java b/test/fixtures/java/NokogiriService.java new file mode 100644 index 00000000..b497675e --- /dev/null +++ b/test/fixtures/java/NokogiriService.java @@ -0,0 +1,598 @@ +/** + * (The MIT License) + * + * Copyright (c) 2008 - 2011: + * + * * {Aaron Patterson}[http://tenderlovemaking.com] + * * {Mike Dalessio}[http://mike.daless.io] + * * {Charles Nutter}[http://blog.headius.com] + * * {Sergio Arbeo}[http://www.serabe.com] + * * {Patrick Mahoney}[http://polycrystal.org] + * * {Yoko Harada}[http://yokolet.blogspot.com] + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * 'Software'), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +package nokogiri; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import org.jruby.Ruby; +import org.jruby.RubyArray; +import org.jruby.RubyClass; +import org.jruby.RubyFixnum; +import org.jruby.RubyModule; +import org.jruby.runtime.ObjectAllocator; +import org.jruby.runtime.builtin.IRubyObject; +import org.jruby.runtime.load.BasicLibraryService; + +/** + * Class to provide Nokogiri. This class is used to make "require 'nokogiri'" work + * in JRuby. Also, this class holds a Ruby type cache and allocators of Ruby types. + * + * @author headius + * @author Yoko Harada + */ +public class NokogiriService implements BasicLibraryService { + public static final String nokogiriClassCacheGvarName = "$NOKOGIRI_CLASS_CACHE"; + public static Map nokogiriClassCache; + + public boolean basicLoad(Ruby ruby) { + init(ruby); + createNokogiriClassCahce(ruby); + return true; + } + + private static void createNokogiriClassCahce(Ruby ruby) { + nokogiriClassCache = Collections.synchronizedMap(new HashMap()); + nokogiriClassCache.put("Nokogiri::EncodingHandler", (RubyClass)ruby.getClassFromPath("Nokogiri::EncodingHandler")); + nokogiriClassCache.put("Nokogiri::HTML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::Document")); + nokogiriClassCache.put("Nokogiri::HTML::ElementDescription", (RubyClass)ruby.getClassFromPath("Nokogiri::HTML::ElementDescription")); + nokogiriClassCache.put("Nokogiri::XML::Attr", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Attr")); + nokogiriClassCache.put("Nokogiri::XML::Document", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Document")); + nokogiriClassCache.put("Nokogiri::XML::DocumentFragment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DocumentFragment")); + nokogiriClassCache.put("Nokogiri::XML::DTD", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::DTD")); + nokogiriClassCache.put("Nokogiri::XML::Text", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Text")); + nokogiriClassCache.put("Nokogiri::XML::Comment", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Comment")); + nokogiriClassCache.put("Nokogiri::XML::Element", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Element")); + nokogiriClassCache.put("Nokogiri::XML::ElementContent", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementContent")); + nokogiriClassCache.put("Nokogiri::XML::ElementDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ElementDecl")); + nokogiriClassCache.put("Nokogiri::XML::EntityDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityDecl")); + nokogiriClassCache.put("Nokogiri::XML::EntityReference", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::EntityReference")); + nokogiriClassCache.put("Nokogiri::XML::ProcessingInstruction", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::ProcessingInstruction")); + nokogiriClassCache.put("Nokogiri::XML::CDATA", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::CDATA")); + nokogiriClassCache.put("Nokogiri::XML::Node", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Node")); + nokogiriClassCache.put("Nokogiri::XML::NodeSet", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::NodeSet")); + nokogiriClassCache.put("Nokogiri::XML::Namespace", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Namespace")); + nokogiriClassCache.put("Nokogiri::XML::SyntaxError", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SyntaxError")); + nokogiriClassCache.put("Nokogiri::XML::Reader", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Reader")); + nokogiriClassCache.put("Nokogiri::XML::RelaxNG", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::RelaxNG")); + nokogiriClassCache.put("Nokogiri::XML::Schema", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::Schema")); + nokogiriClassCache.put("Nokogiri::XML::XPathContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::XPathContext")); + nokogiriClassCache.put("Nokogiri::XML::AttributeDecl", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::AttributeDecl")); + nokogiriClassCache.put("Nokogiri::XML::SAX::ParserContext", (RubyClass)ruby.getClassFromPath("Nokogiri::XML::SAX::ParserContext")); + } + + private void init(Ruby ruby) { + RubyModule nokogiri = ruby.defineModule("Nokogiri"); + RubyModule xmlModule = nokogiri.defineModuleUnder("XML"); + RubyModule xmlSaxModule = xmlModule.defineModuleUnder("SAX"); + RubyModule htmlModule = nokogiri.defineModuleUnder("HTML"); + RubyModule htmlSaxModule = htmlModule.defineModuleUnder("SAX"); + RubyModule xsltModule = nokogiri.defineModuleUnder("XSLT"); + + createNokogiriModule(ruby, nokogiri); + createSyntaxErrors(ruby, nokogiri, xmlModule); + RubyClass xmlNode = createXmlModule(ruby, xmlModule); + createHtmlModule(ruby, htmlModule); + createDocuments(ruby, xmlModule, htmlModule, xmlNode); + createSaxModule(ruby, xmlSaxModule, htmlSaxModule); + createXsltModule(ruby, xsltModule); + } + + private void createNokogiriModule(Ruby ruby, RubyModule nokogiri) {; + RubyClass encHandler = nokogiri.defineClassUnder("EncodingHandler", ruby.getObject(), ENCODING_HANDLER_ALLOCATOR); + encHandler.defineAnnotatedMethods(EncodingHandler.class); + } + + private void createSyntaxErrors(Ruby ruby, RubyModule nokogiri, RubyModule xmlModule) { + RubyClass syntaxError = nokogiri.defineClassUnder("SyntaxError", ruby.getStandardError(), ruby.getStandardError().getAllocator()); + RubyClass xmlSyntaxError = xmlModule.defineClassUnder("SyntaxError", syntaxError, XML_SYNTAXERROR_ALLOCATOR); + xmlSyntaxError.defineAnnotatedMethods(XmlSyntaxError.class); + } + + private RubyClass createXmlModule(Ruby ruby, RubyModule xmlModule) { + RubyClass node = xmlModule.defineClassUnder("Node", ruby.getObject(), XML_NODE_ALLOCATOR); + node.defineAnnotatedMethods(XmlNode.class); + + RubyClass attr = xmlModule.defineClassUnder("Attr", node, XML_ATTR_ALLOCATOR); + attr.defineAnnotatedMethods(XmlAttr.class); + + RubyClass attrDecl = xmlModule.defineClassUnder("AttributeDecl", node, XML_ATTRIBUTE_DECL_ALLOCATOR); + attrDecl.defineAnnotatedMethods(XmlAttributeDecl.class); + + RubyClass characterData = xmlModule.defineClassUnder("CharacterData", node, null); + + RubyClass comment = xmlModule.defineClassUnder("Comment", characterData, XML_COMMENT_ALLOCATOR); + comment.defineAnnotatedMethods(XmlComment.class); + + RubyClass text = xmlModule.defineClassUnder("Text", characterData, XML_TEXT_ALLOCATOR); + text.defineAnnotatedMethods(XmlText.class); + + RubyModule cdata = xmlModule.defineClassUnder("CDATA", text, XML_CDATA_ALLOCATOR); + cdata.defineAnnotatedMethods(XmlCdata.class); + + RubyClass dtd = xmlModule.defineClassUnder("DTD", node, XML_DTD_ALLOCATOR); + dtd.defineAnnotatedMethods(XmlDtd.class); + + RubyClass documentFragment = xmlModule.defineClassUnder("DocumentFragment", node, XML_DOCUMENT_FRAGMENT_ALLOCATOR); + documentFragment.defineAnnotatedMethods(XmlDocumentFragment.class); + + RubyClass element = xmlModule.defineClassUnder("Element", node, XML_ELEMENT_ALLOCATOR); + element.defineAnnotatedMethods(XmlElement.class); + + RubyClass elementContent = xmlModule.defineClassUnder("ElementContent", ruby.getObject(), XML_ELEMENT_CONTENT_ALLOCATOR); + elementContent.defineAnnotatedMethods(XmlElementContent.class); + + RubyClass elementDecl = xmlModule.defineClassUnder("ElementDecl", node, XML_ELEMENT_DECL_ALLOCATOR); + elementDecl.defineAnnotatedMethods(XmlElementDecl.class); + + RubyClass entityDecl = xmlModule.defineClassUnder("EntityDecl", node, XML_ENTITY_DECL_ALLOCATOR); + entityDecl.defineAnnotatedMethods(XmlEntityDecl.class); + + entityDecl.defineConstant("INTERNAL_GENERAL", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_GENERAL)); + entityDecl.defineConstant("EXTERNAL_GENERAL_PARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_PARSED)); + entityDecl.defineConstant("EXTERNAL_GENERAL_UNPARSED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_GENERAL_UNPARSED)); + entityDecl.defineConstant("INTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PARAMETER)); + entityDecl.defineConstant("EXTERNAL_PARAMETER", RubyFixnum.newFixnum(ruby, XmlEntityDecl.EXTERNAL_PARAMETER)); + entityDecl.defineConstant("INTERNAL_PREDEFINED", RubyFixnum.newFixnum(ruby, XmlEntityDecl.INTERNAL_PREDEFINED)); + + RubyClass entref = xmlModule.defineClassUnder("EntityReference", node, XML_ENTITY_REFERENCE_ALLOCATOR); + entref.defineAnnotatedMethods(XmlEntityReference.class); + + RubyClass namespace = xmlModule.defineClassUnder("Namespace", ruby.getObject(), XML_NAMESPACE_ALLOCATOR); + namespace.defineAnnotatedMethods(XmlNamespace.class); + + RubyClass nodeSet = xmlModule.defineClassUnder("NodeSet", ruby.getObject(), XML_NODESET_ALLOCATOR); + nodeSet.defineAnnotatedMethods(XmlNodeSet.class); + + RubyClass pi = xmlModule.defineClassUnder("ProcessingInstruction", node, XML_PROCESSING_INSTRUCTION_ALLOCATOR); + pi.defineAnnotatedMethods(XmlProcessingInstruction.class); + + RubyClass reader = xmlModule.defineClassUnder("Reader", ruby.getObject(), XML_READER_ALLOCATOR); + reader.defineAnnotatedMethods(XmlReader.class); + + RubyClass schema = xmlModule.defineClassUnder("Schema", ruby.getObject(), XML_SCHEMA_ALLOCATOR); + schema.defineAnnotatedMethods(XmlSchema.class); + + RubyClass relaxng = xmlModule.defineClassUnder("RelaxNG", schema, XML_RELAXNG_ALLOCATOR); + relaxng.defineAnnotatedMethods(XmlRelaxng.class); + + RubyClass xpathContext = xmlModule.defineClassUnder("XPathContext", ruby.getObject(), XML_XPATHCONTEXT_ALLOCATOR); + xpathContext.defineAnnotatedMethods(XmlXpathContext.class); + + return node; + } + + private void createHtmlModule(Ruby ruby, RubyModule htmlModule) { + RubyClass htmlElemDesc = htmlModule.defineClassUnder("ElementDescription", ruby.getObject(), HTML_ELEMENT_DESCRIPTION_ALLOCATOR); + htmlElemDesc.defineAnnotatedMethods(HtmlElementDescription.class); + + RubyClass htmlEntityLookup = htmlModule.defineClassUnder("EntityLookup", ruby.getObject(), HTML_ENTITY_LOOKUP_ALLOCATOR); + htmlEntityLookup.defineAnnotatedMethods(HtmlEntityLookup.class); + } + + private void createDocuments(Ruby ruby, RubyModule xmlModule, RubyModule htmlModule, RubyClass node) { + RubyClass xmlDocument = xmlModule.defineClassUnder("Document", node, XML_DOCUMENT_ALLOCATOR); + xmlDocument.defineAnnotatedMethods(XmlDocument.class); + + //RubyModule htmlDoc = html.defineOrGetClassUnder("Document", document); + RubyModule htmlDocument = htmlModule.defineClassUnder("Document", xmlDocument, HTML_DOCUMENT_ALLOCATOR); + htmlDocument.defineAnnotatedMethods(HtmlDocument.class); + } + + private void createSaxModule(Ruby ruby, RubyModule xmlSaxModule, RubyModule htmlSaxModule) { + RubyClass xmlSaxParserContext = xmlSaxModule.defineClassUnder("ParserContext", ruby.getObject(), XML_SAXPARSER_CONTEXT_ALLOCATOR); + xmlSaxParserContext.defineAnnotatedMethods(XmlSaxParserContext.class); + + RubyClass xmlSaxPushParser = xmlSaxModule.defineClassUnder("PushParser", ruby.getObject(), XML_SAXPUSHPARSER_ALLOCATOR); + xmlSaxPushParser.defineAnnotatedMethods(XmlSaxPushParser.class); + + RubyClass htmlSaxParserContext = htmlSaxModule.defineClassUnder("ParserContext", xmlSaxParserContext, HTML_SAXPARSER_CONTEXT_ALLOCATOR); + htmlSaxParserContext.defineAnnotatedMethods(HtmlSaxParserContext.class); + } + + private void createXsltModule(Ruby ruby, RubyModule xsltModule) { + RubyClass stylesheet = xsltModule.defineClassUnder("Stylesheet", ruby.getObject(), XSLT_STYLESHEET_ALLOCATOR); + stylesheet.defineAnnotatedMethods(XsltStylesheet.class); + xsltModule.defineAnnotatedMethod(XsltStylesheet.class, "register"); + } + + private static ObjectAllocator ENCODING_HANDLER_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new EncodingHandler(runtime, klazz, ""); + } + }; + + public static final ObjectAllocator HTML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { + private HtmlDocument htmlDocument = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (htmlDocument == null) htmlDocument = new HtmlDocument(runtime, klazz); + try { + HtmlDocument clone = (HtmlDocument) htmlDocument.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new HtmlDocument(runtime, klazz); + } + } + }; + + public static final ObjectAllocator HTML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() { + private HtmlSaxParserContext htmlSaxParserContext = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (htmlSaxParserContext == null) htmlSaxParserContext = new HtmlSaxParserContext(runtime, klazz); + try { + HtmlSaxParserContext clone = (HtmlSaxParserContext) htmlSaxParserContext.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new HtmlSaxParserContext(runtime, klazz); + } + } + }; + + private static ObjectAllocator HTML_ELEMENT_DESCRIPTION_ALLOCATOR = + new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new HtmlElementDescription(runtime, klazz); + } + }; + + private static ObjectAllocator HTML_ENTITY_LOOKUP_ALLOCATOR = + new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new HtmlEntityLookup(runtime, klazz); + } + }; + + public static final ObjectAllocator XML_ATTR_ALLOCATOR = new ObjectAllocator() { + private XmlAttr xmlAttr = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlAttr == null) xmlAttr = new XmlAttr(runtime, klazz); + try { + XmlAttr clone = (XmlAttr) xmlAttr.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlAttr(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_CDATA_ALLOCATOR = new ObjectAllocator() { + private XmlCdata xmlCdata = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlCdata == null) xmlCdata = new XmlCdata(runtime, klazz); + try { + XmlCdata clone = (XmlCdata) xmlCdata.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlCdata(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_COMMENT_ALLOCATOR = new ObjectAllocator() { + private XmlComment xmlComment = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlComment == null) xmlComment = new XmlComment(runtime, klazz); + try { + XmlComment clone = (XmlComment) xmlComment.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlComment(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DOCUMENT_ALLOCATOR = new ObjectAllocator() { + private XmlDocument xmlDocument = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDocument == null) xmlDocument = new XmlDocument(runtime, klazz); + try { + XmlDocument clone = (XmlDocument) xmlDocument.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDocument(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DOCUMENT_FRAGMENT_ALLOCATOR = new ObjectAllocator() { + private XmlDocumentFragment xmlDocumentFragment = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDocumentFragment == null) xmlDocumentFragment = new XmlDocumentFragment(runtime, klazz); + try { + XmlDocumentFragment clone = (XmlDocumentFragment)xmlDocumentFragment.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDocumentFragment(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_DTD_ALLOCATOR = new ObjectAllocator() { + private XmlDtd xmlDtd = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlDtd == null) xmlDtd = new XmlDtd(runtime, klazz); + try { + XmlDtd clone = (XmlDtd)xmlDtd.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlDtd(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_ELEMENT_ALLOCATOR = new ObjectAllocator() { + private XmlElement xmlElement = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlElement == null) xmlElement = new XmlElement(runtime, klazz); + try { + XmlElement clone = (XmlElement)xmlElement.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlElement(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_ELEMENT_DECL_ALLOCATOR = new ObjectAllocator() { + private XmlElementDecl xmlElementDecl = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlElementDecl == null) xmlElementDecl = new XmlElementDecl(runtime, klazz); + try { + XmlElementDecl clone = (XmlElementDecl)xmlElementDecl.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlElementDecl(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_ENTITY_REFERENCE_ALLOCATOR = new ObjectAllocator() { + private XmlEntityReference xmlEntityRef = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlEntityRef == null) xmlEntityRef = new XmlEntityReference(runtime, klazz); + try { + XmlEntityReference clone = (XmlEntityReference)xmlEntityRef.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlEntityReference(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NAMESPACE_ALLOCATOR = new ObjectAllocator() { + private XmlNamespace xmlNamespace = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNamespace == null) xmlNamespace = new XmlNamespace(runtime, klazz); + try { + XmlNamespace clone = (XmlNamespace) xmlNamespace.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlNamespace(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NODE_ALLOCATOR = new ObjectAllocator() { + private XmlNode xmlNode = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNode == null) xmlNode = new XmlNode(runtime, klazz); + try { + XmlNode clone = (XmlNode) xmlNode.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlNode(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_NODESET_ALLOCATOR = new ObjectAllocator() { + private XmlNodeSet xmlNodeSet = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlNodeSet == null) xmlNodeSet = new XmlNodeSet(runtime, klazz); + try { + XmlNodeSet clone = (XmlNodeSet) xmlNodeSet.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + xmlNodeSet = new XmlNodeSet(runtime, klazz); + xmlNodeSet.setNodes(RubyArray.newEmptyArray(runtime)); + return xmlNodeSet; + } + } + }; + + public static ObjectAllocator XML_PROCESSING_INSTRUCTION_ALLOCATOR = new ObjectAllocator() { + private XmlProcessingInstruction xmlProcessingInstruction = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlProcessingInstruction == null) xmlProcessingInstruction = new XmlProcessingInstruction(runtime, klazz); + try { + XmlProcessingInstruction clone = (XmlProcessingInstruction)xmlProcessingInstruction.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlProcessingInstruction(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_READER_ALLOCATOR = new ObjectAllocator() { + private XmlReader xmlReader = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlReader == null) xmlReader = new XmlReader(runtime, klazz); + try { + XmlReader clone = (XmlReader) xmlReader.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + xmlReader = new XmlReader(runtime, klazz); + return xmlReader; + } + } + }; + + private static ObjectAllocator XML_ATTRIBUTE_DECL_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlAttributeDecl(runtime, klazz); + } + }; + + private static ObjectAllocator XML_ENTITY_DECL_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlEntityDecl(runtime, klazz); + } + }; + + private static ObjectAllocator XML_ELEMENT_CONTENT_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + throw runtime.newNotImplementedError("not implemented"); + } + }; + + public static final ObjectAllocator XML_RELAXNG_ALLOCATOR = new ObjectAllocator() { + private XmlRelaxng xmlRelaxng = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlRelaxng == null) xmlRelaxng = new XmlRelaxng(runtime, klazz); + try { + XmlRelaxng clone = (XmlRelaxng) xmlRelaxng.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlRelaxng(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_SAXPARSER_CONTEXT_ALLOCATOR = new ObjectAllocator() { + private XmlSaxParserContext xmlSaxParserContext = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlSaxParserContext == null) xmlSaxParserContext = new XmlSaxParserContext(runtime, klazz); + try { + XmlSaxParserContext clone = (XmlSaxParserContext) xmlSaxParserContext.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlSaxParserContext(runtime, klazz); + } + } + }; + + private static ObjectAllocator XML_SAXPUSHPARSER_ALLOCATOR = new ObjectAllocator() { + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + return new XmlSaxPushParser(runtime, klazz); + } + }; + + public static final ObjectAllocator XML_SCHEMA_ALLOCATOR = new ObjectAllocator() { + private XmlSchema xmlSchema = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlSchema == null) xmlSchema = new XmlSchema(runtime, klazz); + try { + XmlSchema clone = (XmlSchema) xmlSchema.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlSchema(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_SYNTAXERROR_ALLOCATOR = new ObjectAllocator() { + private XmlSyntaxError xmlSyntaxError = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlSyntaxError == null) xmlSyntaxError = new XmlSyntaxError(runtime, klazz); + try { + XmlSyntaxError clone = (XmlSyntaxError) xmlSyntaxError.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlSyntaxError(runtime, klazz); + } + } + }; + + public static final ObjectAllocator XML_TEXT_ALLOCATOR = new ObjectAllocator() { + private XmlText xmlText = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlText == null) xmlText = new XmlText(runtime, klazz); + try { + XmlText clone = (XmlText) xmlText.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlText(runtime, klazz); + } + } + }; + + public static ObjectAllocator XML_XPATHCONTEXT_ALLOCATOR = new ObjectAllocator() { + private XmlXpathContext xmlXpathContext = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xmlXpathContext == null) xmlXpathContext = new XmlXpathContext(runtime, klazz); + try { + XmlXpathContext clone = (XmlXpathContext) xmlXpathContext.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlXpathContext(runtime, klazz); + } + } + }; + + public static ObjectAllocator XSLT_STYLESHEET_ALLOCATOR = new ObjectAllocator() { + private XsltStylesheet xsltStylesheet = null; + public IRubyObject allocate(Ruby runtime, RubyClass klazz) { + if (xsltStylesheet == null) xsltStylesheet = new XsltStylesheet(runtime, klazz); + try { + XsltStylesheet clone = (XsltStylesheet) xsltStylesheet.clone(); + clone.setMetaClass(klazz); + return clone; + } catch (CloneNotSupportedException e) { + return new XmlText(runtime, klazz); + } + } + }; +} diff --git a/test/fixtures/java/clojure-type.java b/test/fixtures/java/clojure-type.java new file mode 100644 index 00000000..4cde14a1 --- /dev/null +++ b/test/fixtures/java/clojure-type.java @@ -0,0 +1,872 @@ +/*** + * ASM: a very small and fast Java bytecode manipulation framework + * Copyright (c) 2000-2005 INRIA, France Telecom + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ +package clojure.asm; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; + +/** + * A Java type. This class can be used to make it easier to manipulate type and + * method descriptors. + * + * @author Eric Bruneton + * @author Chris Nokleberg + */ +public class Type{ + +/** + * The sort of the void type. See {@link #getSort getSort}. + */ +public final static int VOID = 0; + +/** + * The sort of the boolean type. See {@link #getSort getSort}. + */ +public final static int BOOLEAN = 1; + +/** + * The sort of the char type. See {@link #getSort getSort}. + */ +public final static int CHAR = 2; + +/** + * The sort of the byte type. See {@link #getSort getSort}. + */ +public final static int BYTE = 3; + +/** + * The sort of the short type. See {@link #getSort getSort}. + */ +public final static int SHORT = 4; + +/** + * The sort of the int type. See {@link #getSort getSort}. + */ +public final static int INT = 5; + +/** + * The sort of the float type. See {@link #getSort getSort}. + */ +public final static int FLOAT = 6; + +/** + * The sort of the long type. See {@link #getSort getSort}. + */ +public final static int LONG = 7; + +/** + * The sort of the double type. See {@link #getSort getSort}. + */ +public final static int DOUBLE = 8; + +/** + * The sort of array reference types. See {@link #getSort getSort}. + */ +public final static int ARRAY = 9; + +/** + * The sort of object reference type. See {@link #getSort getSort}. + */ +public final static int OBJECT = 10; + +/** + * The void type. + */ +public final static Type VOID_TYPE = new Type(VOID); + +/** + * The boolean type. + */ +public final static Type BOOLEAN_TYPE = new Type(BOOLEAN); + +/** + * The char type. + */ +public final static Type CHAR_TYPE = new Type(CHAR); + +/** + * The byte type. + */ +public final static Type BYTE_TYPE = new Type(BYTE); + +/** + * The short type. + */ +public final static Type SHORT_TYPE = new Type(SHORT); + +/** + * The int type. + */ +public final static Type INT_TYPE = new Type(INT); + +/** + * The float type. + */ +public final static Type FLOAT_TYPE = new Type(FLOAT); + +/** + * The long type. + */ +public final static Type LONG_TYPE = new Type(LONG); + +/** + * The double type. + */ +public final static Type DOUBLE_TYPE = new Type(DOUBLE); + +// ------------------------------------------------------------------------ +// Fields +// ------------------------------------------------------------------------ + +/** + * The sort of this Java type. + */ +private final int sort; + +/** + * A buffer containing the descriptor of this Java type. This field is only + * used for reference types. + */ +private char[] buf; + +/** + * The offset of the descriptor of this Java type in {@link #buf buf}. This + * field is only used for reference types. + */ +private int off; + +/** + * The length of the descriptor of this Java type. + */ +private int len; + +// ------------------------------------------------------------------------ +// Constructors +// ------------------------------------------------------------------------ + +/** + * Constructs a primitive type. + * + * @param sort the sort of the primitive type to be constructed. + */ +private Type(final int sort){ + this.sort = sort; + this.len = 1; +} + +/** + * Constructs a reference type. + * + * @param sort the sort of the reference type to be constructed. + * @param buf a buffer containing the descriptor of the previous type. + * @param off the offset of this descriptor in the previous buffer. + * @param len the length of this descriptor. + */ +private Type(final int sort, final char[] buf, final int off, final int len){ + this.sort = sort; + this.buf = buf; + this.off = off; + this.len = len; +} + +/** + * Returns the Java type corresponding to the given type descriptor. + * + * @param typeDescriptor a type descriptor. + * @return the Java type corresponding to the given type descriptor. + */ +public static Type getType(final String typeDescriptor){ + return getType(typeDescriptor.toCharArray(), 0); +} + +/** + * Returns the Java type corresponding to the given class. + * + * @param c a class. + * @return the Java type corresponding to the given class. + */ +public static Type getType(final Class c){ + if(c.isPrimitive()) + { + if(c == Integer.TYPE) + { + return INT_TYPE; + } + else if(c == Void.TYPE) + { + return VOID_TYPE; + } + else if(c == Boolean.TYPE) + { + return BOOLEAN_TYPE; + } + else if(c == Byte.TYPE) + { + return BYTE_TYPE; + } + else if(c == Character.TYPE) + { + return CHAR_TYPE; + } + else if(c == Short.TYPE) + { + return SHORT_TYPE; + } + else if(c == Double.TYPE) + { + return DOUBLE_TYPE; + } + else if(c == Float.TYPE) + { + return FLOAT_TYPE; + } + else /* if (c == Long.TYPE) */ + { + return LONG_TYPE; + } + } + else + { + return getType(getDescriptor(c)); + } +} + +/** + * Returns the {@link Type#OBJECT} type for the given internal class name. + * This is a shortcut method for Type.getType("L"+name+";"). + * Note that opposed to {@link Type#getType(String)}, this method takes + * internal class names and not class descriptor. + * + * @param name an internal class name. + * @return the the {@link Type#OBJECT} type for the given class name. + */ +public static Type getObjectType(String name){ + int l = name.length(); + char[] buf = new char[l + 2]; + buf[0] = 'L'; + buf[l + 1] = ';'; + name.getChars(0, l, buf, 1); + return new Type(OBJECT, buf, 0, l + 2); +} + +/** + * Returns the Java types corresponding to the argument types of the given + * method descriptor. + * + * @param methodDescriptor a method descriptor. + * @return the Java types corresponding to the argument types of the given + * method descriptor. + */ +public static Type[] getArgumentTypes(final String methodDescriptor){ + char[] buf = methodDescriptor.toCharArray(); + int off = 1; + int size = 0; + while(true) + { + char car = buf[off++]; + if(car == ')') + { + break; + } + else if(car == 'L') + { + while(buf[off++] != ';') + { + } + ++size; + } + else if(car != '[') + { + ++size; + } + } + Type[] args = new Type[size]; + off = 1; + size = 0; + while(buf[off] != ')') + { + args[size] = getType(buf, off); + off += args[size].len; + size += 1; + } + return args; +} + +/** + * Returns the Java types corresponding to the argument types of the given + * method. + * + * @param method a method. + * @return the Java types corresponding to the argument types of the given + * method. + */ +public static Type[] getArgumentTypes(final Method method){ + Class[] classes = method.getParameterTypes(); + Type[] types = new Type[classes.length]; + for(int i = classes.length - 1; i >= 0; --i) + { + types[i] = getType(classes[i]); + } + return types; +} + +/** + * Returns the Java type corresponding to the return type of the given + * method descriptor. + * + * @param methodDescriptor a method descriptor. + * @return the Java type corresponding to the return type of the given + * method descriptor. + */ +public static Type getReturnType(final String methodDescriptor){ + char[] buf = methodDescriptor.toCharArray(); + return getType(buf, methodDescriptor.indexOf(')') + 1); +} + +/** + * Returns the Java type corresponding to the return type of the given + * method. + * + * @param method a method. + * @return the Java type corresponding to the return type of the given + * method. + */ +public static Type getReturnType(final Method method){ + return getType(method.getReturnType()); +} + +/** + * Returns the Java type corresponding to the given type descriptor. + * + * @param buf a buffer containing a type descriptor. + * @param off the offset of this descriptor in the previous buffer. + * @return the Java type corresponding to the given type descriptor. + */ +private static Type getType(final char[] buf, final int off){ + int len; + switch(buf[off]) + { + case'V': + return VOID_TYPE; + case'Z': + return BOOLEAN_TYPE; + case'C': + return CHAR_TYPE; + case'B': + return BYTE_TYPE; + case'S': + return SHORT_TYPE; + case'I': + return INT_TYPE; + case'F': + return FLOAT_TYPE; + case'J': + return LONG_TYPE; + case'D': + return DOUBLE_TYPE; + case'[': + len = 1; + while(buf[off + len] == '[') + { + ++len; + } + if(buf[off + len] == 'L') + { + ++len; + while(buf[off + len] != ';') + { + ++len; + } + } + return new Type(ARRAY, buf, off, len + 1); + // case 'L': + default: + len = 1; + while(buf[off + len] != ';') + { + ++len; + } + return new Type(OBJECT, buf, off, len + 1); + } +} + +// ------------------------------------------------------------------------ +// Accessors +// ------------------------------------------------------------------------ + +/** + * Returns the sort of this Java type. + * + * @return {@link #VOID VOID}, {@link #BOOLEAN BOOLEAN}, + * {@link #CHAR CHAR}, {@link #BYTE BYTE}, {@link #SHORT SHORT}, + * {@link #INT INT}, {@link #FLOAT FLOAT}, {@link #LONG LONG}, + * {@link #DOUBLE DOUBLE}, {@link #ARRAY ARRAY} or + * {@link #OBJECT OBJECT}. + */ +public int getSort(){ + return sort; +} + +/** + * Returns the number of dimensions of this array type. This method should + * only be used for an array type. + * + * @return the number of dimensions of this array type. + */ +public int getDimensions(){ + int i = 1; + while(buf[off + i] == '[') + { + ++i; + } + return i; +} + +/** + * Returns the type of the elements of this array type. This method should + * only be used for an array type. + * + * @return Returns the type of the elements of this array type. + */ +public Type getElementType(){ + return getType(buf, off + getDimensions()); +} + +/** + * Returns the name of the class corresponding to this type. + * + * @return the fully qualified name of the class corresponding to this type. + */ +public String getClassName(){ + switch(sort) + { + case VOID: + return "void"; + case BOOLEAN: + return "boolean"; + case CHAR: + return "char"; + case BYTE: + return "byte"; + case SHORT: + return "short"; + case INT: + return "int"; + case FLOAT: + return "float"; + case LONG: + return "long"; + case DOUBLE: + return "double"; + case ARRAY: + StringBuffer b = new StringBuffer(getElementType().getClassName()); + for(int i = getDimensions(); i > 0; --i) + { + b.append("[]"); + } + return b.toString(); + // case OBJECT: + default: + return new String(buf, off + 1, len - 2).replace('/', '.'); + } +} + +/** + * Returns the internal name of the class corresponding to this object type. + * The internal name of a class is its fully qualified name, where '.' are + * replaced by '/'. This method should only be used for an object type. + * + * @return the internal name of the class corresponding to this object type. + */ +public String getInternalName(){ + return new String(buf, off + 1, len - 2); +} + +// ------------------------------------------------------------------------ +// Conversion to type descriptors +// ------------------------------------------------------------------------ + +/** + * Returns the descriptor corresponding to this Java type. + * + * @return the descriptor corresponding to this Java type. + */ +public String getDescriptor(){ + StringBuffer buf = new StringBuffer(); + getDescriptor(buf); + return buf.toString(); +} + +/** + * Returns the descriptor corresponding to the given argument and return + * types. + * + * @param returnType the return type of the method. + * @param argumentTypes the argument types of the method. + * @return the descriptor corresponding to the given argument and return + * types. + */ +public static String getMethodDescriptor( + final Type returnType, + final Type[] argumentTypes){ + StringBuffer buf = new StringBuffer(); + buf.append('('); + for(int i = 0; i < argumentTypes.length; ++i) + { + argumentTypes[i].getDescriptor(buf); + } + buf.append(')'); + returnType.getDescriptor(buf); + return buf.toString(); +} + +/** + * Appends the descriptor corresponding to this Java type to the given + * string buffer. + * + * @param buf the string buffer to which the descriptor must be appended. + */ +private void getDescriptor(final StringBuffer buf){ + switch(sort) + { + case VOID: + buf.append('V'); + return; + case BOOLEAN: + buf.append('Z'); + return; + case CHAR: + buf.append('C'); + return; + case BYTE: + buf.append('B'); + return; + case SHORT: + buf.append('S'); + return; + case INT: + buf.append('I'); + return; + case FLOAT: + buf.append('F'); + return; + case LONG: + buf.append('J'); + return; + case DOUBLE: + buf.append('D'); + return; + // case ARRAY: + // case OBJECT: + default: + buf.append(this.buf, off, len); + } +} + +// ------------------------------------------------------------------------ +// Direct conversion from classes to type descriptors, +// without intermediate Type objects +// ------------------------------------------------------------------------ + +/** + * Returns the internal name of the given class. The internal name of a + * class is its fully qualified name, where '.' are replaced by '/'. + * + * @param c an object class. + * @return the internal name of the given class. + */ +public static String getInternalName(final Class c){ + return c.getName().replace('.', '/'); +} + +/** + * Returns the descriptor corresponding to the given Java type. + * + * @param c an object class, a primitive class or an array class. + * @return the descriptor corresponding to the given class. + */ +public static String getDescriptor(final Class c){ + StringBuffer buf = new StringBuffer(); + getDescriptor(buf, c); + return buf.toString(); +} + +/** + * Returns the descriptor corresponding to the given constructor. + * + * @param c a {@link Constructor Constructor} object. + * @return the descriptor of the given constructor. + */ +public static String getConstructorDescriptor(final Constructor c){ + Class[] parameters = c.getParameterTypes(); + StringBuffer buf = new StringBuffer(); + buf.append('('); + for(int i = 0; i < parameters.length; ++i) + { + getDescriptor(buf, parameters[i]); + } + return buf.append(")V").toString(); +} + +/** + * Returns the descriptor corresponding to the given method. + * + * @param m a {@link Method Method} object. + * @return the descriptor of the given method. + */ +public static String getMethodDescriptor(final Method m){ + Class[] parameters = m.getParameterTypes(); + StringBuffer buf = new StringBuffer(); + buf.append('('); + for(int i = 0; i < parameters.length; ++i) + { + getDescriptor(buf, parameters[i]); + } + buf.append(')'); + getDescriptor(buf, m.getReturnType()); + return buf.toString(); +} + +/** + * Appends the descriptor of the given class to the given string buffer. + * + * @param buf the string buffer to which the descriptor must be appended. + * @param c the class whose descriptor must be computed. + */ +private static void getDescriptor(final StringBuffer buf, final Class c){ + Class d = c; + while(true) + { + if(d.isPrimitive()) + { + char car; + if(d == Integer.TYPE) + { + car = 'I'; + } + else if(d == Void.TYPE) + { + car = 'V'; + } + else if(d == Boolean.TYPE) + { + car = 'Z'; + } + else if(d == Byte.TYPE) + { + car = 'B'; + } + else if(d == Character.TYPE) + { + car = 'C'; + } + else if(d == Short.TYPE) + { + car = 'S'; + } + else if(d == Double.TYPE) + { + car = 'D'; + } + else if(d == Float.TYPE) + { + car = 'F'; + } + else /* if (d == Long.TYPE) */ + { + car = 'J'; + } + buf.append(car); + return; + } + else if(d.isArray()) + { + buf.append('['); + d = d.getComponentType(); + } + else + { + buf.append('L'); + String name = d.getName(); + int len = name.length(); + for(int i = 0; i < len; ++i) + { + char car = name.charAt(i); + buf.append(car == '.' ? '/' : car); + } + buf.append(';'); + return; + } + } +} + +// ------------------------------------------------------------------------ +// Corresponding size and opcodes +// ------------------------------------------------------------------------ + +/** + * Returns the size of values of this type. + * + * @return the size of values of this type, i.e., 2 for long and + * double, and 1 otherwise. + */ +public int getSize(){ + return sort == LONG || sort == DOUBLE ? 2 : 1; +} + +/** + * Returns a JVM instruction opcode adapted to this Java type. + * + * @param opcode a JVM instruction opcode. This opcode must be one of ILOAD, + * ISTORE, IALOAD, IASTORE, IADD, ISUB, IMUL, IDIV, IREM, INEG, ISHL, + * ISHR, IUSHR, IAND, IOR, IXOR and IRETURN. + * @return an opcode that is similar to the given opcode, but adapted to + * this Java type. For example, if this type is float and + * opcode is IRETURN, this method returns FRETURN. + */ +public int getOpcode(final int opcode){ + if(opcode == Opcodes.IALOAD || opcode == Opcodes.IASTORE) + { + switch(sort) + { + case BOOLEAN: + case BYTE: + return opcode + 5; + case CHAR: + return opcode + 6; + case SHORT: + return opcode + 7; + case INT: + return opcode; + case FLOAT: + return opcode + 2; + case LONG: + return opcode + 1; + case DOUBLE: + return opcode + 3; + // case ARRAY: + // case OBJECT: + default: + return opcode + 4; + } + } + else + { + switch(sort) + { + case VOID: + return opcode + 5; + case BOOLEAN: + case CHAR: + case BYTE: + case SHORT: + case INT: + return opcode; + case FLOAT: + return opcode + 2; + case LONG: + return opcode + 1; + case DOUBLE: + return opcode + 3; + // case ARRAY: + // case OBJECT: + default: + return opcode + 4; + } + } +} + +// ------------------------------------------------------------------------ +// Equals, hashCode and toString +// ------------------------------------------------------------------------ + +/** + * Tests if the given object is equal to this type. + * + * @param o the object to be compared to this type. + * @return true if the given object is equal to this type. + */ +public boolean equals(final Object o){ + if(this == o) + { + return true; + } + if(!(o instanceof Type)) + { + return false; + } + Type t = (Type) o; + if(sort != t.sort) + { + return false; + } + if(sort == Type.OBJECT || sort == Type.ARRAY) + { + if(len != t.len) + { + return false; + } + for(int i = off, j = t.off, end = i + len; i < end; i++, j++) + { + if(buf[i] != t.buf[j]) + { + return false; + } + } + } + return true; +} + +/** + * Returns a hash code value for this type. + * + * @return a hash code value for this type. + */ +public int hashCode(){ + int hc = 13 * sort; + if(sort == Type.OBJECT || sort == Type.ARRAY) + { + for(int i = off, end = i + len; i < end; i++) + { + hc = 17 * (hc + buf[i]); + } + } + return hc; +} + +/** + * Returns a string representation of this type. + * + * @return the descriptor of this type. + */ +public String toString(){ + return getDescriptor(); +} +} diff --git a/test/fixtures/java/clojure-util.java b/test/fixtures/java/clojure-util.java new file mode 100644 index 00000000..02b5466f --- /dev/null +++ b/test/fixtures/java/clojure-util.java @@ -0,0 +1,197 @@ +/** + * Copyright (c) Rich Hickey. All rights reserved. + * The use and distribution terms for this software are covered by the + * Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php) + * which can be found in the file epl-v10.html at the root of this distribution. + * By using this software in any fashion, you are agreeing to be bound by + * the terms of this license. + * You must not remove this notice, or any other, from this software. + **/ + +/* rich Apr 19, 2008 */ + +package clojure.lang; + +import java.lang.ref.Reference; +import java.math.BigInteger; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.lang.ref.SoftReference; +import java.lang.ref.ReferenceQueue; + +public class Util{ +static public boolean equiv(Object k1, Object k2){ + if(k1 == k2) + return true; + if(k1 != null) + { + if(k1 instanceof Number && k2 instanceof Number) + return Numbers.equal((Number)k1, (Number)k2); + else if(k1 instanceof IPersistentCollection || k2 instanceof IPersistentCollection) + return pcequiv(k1,k2); + return k1.equals(k2); + } + return false; +} + +static public boolean equiv(long k1, long k2){ + return k1 == k2; +} + +static public boolean equiv(Object k1, long k2){ + return equiv(k1, (Object)k2); +} + +static public boolean equiv(long k1, Object k2){ + return equiv((Object)k1, k2); +} + +static public boolean equiv(double k1, double k2){ + return k1 == k2; +} + +static public boolean equiv(Object k1, double k2){ + return equiv(k1, (Object)k2); +} + +static public boolean equiv(double k1, Object k2){ + return equiv((Object)k1, k2); +} + +static public boolean equiv(boolean k1, boolean k2){ + return k1 == k2; +} + +static public boolean equiv(Object k1, boolean k2){ + return equiv(k1, (Object)k2); +} + +static public boolean equiv(boolean k1, Object k2){ + return equiv((Object)k1, k2); +} + +static public boolean equiv(char c1, char c2) { + return c1 == c2; +} + +static public boolean pcequiv(Object k1, Object k2){ + if(k1 instanceof IPersistentCollection) + return ((IPersistentCollection)k1).equiv(k2); + return ((IPersistentCollection)k2).equiv(k1); +} + +static public boolean equals(Object k1, Object k2){ + if(k1 == k2) + return true; + return k1 != null && k1.equals(k2); +} + +static public boolean identical(Object k1, Object k2){ + return k1 == k2; +} + +static public Class classOf(Object x){ + if(x != null) + return x.getClass(); + return null; +} + +static public int compare(Object k1, Object k2){ + if(k1 == k2) + return 0; + if(k1 != null) + { + if(k2 == null) + return 1; + if(k1 instanceof Number) + return Numbers.compare((Number) k1, (Number) k2); + return ((Comparable) k1).compareTo(k2); + } + return -1; +} + +static public int hash(Object o){ + if(o == null) + return 0; + return o.hashCode(); +} + +static public int hasheq(Object o){ + if(o == null) + return 0; + if(o instanceof Number) + return Numbers.hasheq((Number)o); + else if(o instanceof IHashEq) + return ((IHashEq)o).hasheq(); + return o.hashCode(); +} + +static public int hashCombine(int seed, int hash){ + //a la boost + seed ^= hash + 0x9e3779b9 + (seed << 6) + (seed >> 2); + return seed; +} + +static public boolean isPrimitive(Class c){ + return c != null && c.isPrimitive() && !(c == Void.TYPE); +} + +static public boolean isInteger(Object x){ + return x instanceof Integer + || x instanceof Long + || x instanceof BigInt + || x instanceof BigInteger; +} + +static public Object ret1(Object ret, Object nil){ + return ret; +} + +static public ISeq ret1(ISeq ret, Object nil){ + return ret; +} + +static public void clearCache(ReferenceQueue rq, ConcurrentHashMap> cache){ + //cleanup any dead entries + if(rq.poll() != null) + { + while(rq.poll() != null) + ; + for(Map.Entry> e : cache.entrySet()) + { + Reference val = e.getValue(); + if(val != null && val.get() == null) + cache.remove(e.getKey(), val); + } + } +} + +static public RuntimeException runtimeException(String s){ + return new RuntimeException(s); +} + +static public RuntimeException runtimeException(String s, Throwable e){ + return new RuntimeException(s, e); +} + +/** + * Throw even checked exceptions without being required + * to declare them or catch them. Suggested idiom: + *

+ * throw sneakyThrow( some exception ); + */ +static public RuntimeException sneakyThrow(Throwable t) { + // http://www.mail-archive.com/javaposse@googlegroups.com/msg05984.html + if (t == null) + throw new NullPointerException(); + Util.sneakyThrow0(t); + return null; +} + +@SuppressWarnings("unchecked") +static private void sneakyThrow0(Throwable t) throws T { + throw (T) t; +} + +} + diff --git a/test/fixtures/javascript/bootstrap-modal.js b/test/fixtures/javascript/bootstrap-modal.js new file mode 100644 index 00000000..38fd0c84 --- /dev/null +++ b/test/fixtures/javascript/bootstrap-modal.js @@ -0,0 +1,218 @@ +/* ========================================================= + * bootstrap-modal.js v2.0.4 + * http://twitter.github.com/bootstrap/javascript.html#modals + * ========================================================= + * Copyright 2012 Twitter, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * ========================================================= */ + + +!function ($) { + + "use strict"; // jshint ;_; + + + /* MODAL CLASS DEFINITION + * ====================== */ + + var Modal = function (content, options) { + this.options = options + this.$element = $(content) + .delegate('[data-dismiss="modal"]', 'click.dismiss.modal', $.proxy(this.hide, this)) + } + + Modal.prototype = { + + constructor: Modal + + , toggle: function () { + return this[!this.isShown ? 'show' : 'hide']() + } + + , show: function () { + var that = this + , e = $.Event('show') + + this.$element.trigger(e) + + if (this.isShown || e.isDefaultPrevented()) return + + $('body').addClass('modal-open') + + this.isShown = true + + escape.call(this) + backdrop.call(this, function () { + var transition = $.support.transition && that.$element.hasClass('fade') + + if (!that.$element.parent().length) { + that.$element.appendTo(document.body) //don't move modals dom position + } + + that.$element + .show() + + if (transition) { + that.$element[0].offsetWidth // force reflow + } + + that.$element.addClass('in') + + transition ? + that.$element.one($.support.transition.end, function () { that.$element.trigger('shown') }) : + that.$element.trigger('shown') + + }) + } + + , hide: function (e) { + e && e.preventDefault() + + var that = this + + e = $.Event('hide') + + this.$element.trigger(e) + + if (!this.isShown || e.isDefaultPrevented()) return + + this.isShown = false + + $('body').removeClass('modal-open') + + escape.call(this) + + this.$element.removeClass('in') + + $.support.transition && this.$element.hasClass('fade') ? + hideWithTransition.call(this) : + hideModal.call(this) + } + + } + + + /* MODAL PRIVATE METHODS + * ===================== */ + + function hideWithTransition() { + var that = this + , timeout = setTimeout(function () { + that.$element.off($.support.transition.end) + hideModal.call(that) + }, 500) + + this.$element.one($.support.transition.end, function () { + clearTimeout(timeout) + hideModal.call(that) + }) + } + + function hideModal(that) { + this.$element + .hide() + .trigger('hidden') + + backdrop.call(this) + } + + function backdrop(callback) { + var that = this + , animate = this.$element.hasClass('fade') ? 'fade' : '' + + if (this.isShown && this.options.backdrop) { + var doAnimate = $.support.transition && animate + + this.$backdrop = $('