Commit fcab3591 authored by Duy Nguyen's avatar Duy Nguyen

Merge remote-tracking branch 'pclouds/backup-log' into HEAD

parents 6eeb9f0b b80de167
......@@ -21,6 +21,7 @@
/git-apply
/git-archimport
/git-archive
/git-backup-log
/git-bisect
/git-bisect--helper
/git-blame
......
......@@ -319,6 +319,11 @@ This value is true by default in a repository that has
a working directory associated with it, and false by
default in a bare repository.
core.backupLog::
If true, many commands will keep backup content in object database
before they modify some file. See linkgit:git-backup-log[1] for more
information.
core.repositoryFormatVersion::
Internal variable identifying the repository format and layout
version.
......
......@@ -250,6 +250,9 @@ When `git apply` is used as a "better GNU patch", the user can pass
the `--unsafe-paths` option to override this safety check. This option
has no effect when `--index` or `--cached` is in use.
--keep-backup::
Enable index backup log when `--cached` or `--index` is used.
CONFIGURATION
-------------
......
git-backup-log(1)
=================
NAME
----
git-backup-log - Manage backup log files
SYNOPSIS
--------
[verse]
'git backup-log' [--id=<id> | --path=<path>] log [<rev-options>] [[--] <path>...]
'git backup-log' [--id=<id> | --path=<path>] cat [--before] [--hash] <change-id> <path>
'git backup-log' [--id=<id> | --path=<path>] diff [<diff-options>] <change-id>
'git backup-log' [--id=<id> | --path=<path>] prune [--expire=<time>]
'git backup-log' [--id=<id> | --path=<path>] update <path> <old-hash> <new-hash>
DESCRIPTION
-----------
Backup log records changes of certain files in the object database so
that if some file is overwritten by accident, you could still get the
original content back.
Backup log is enabled by setting core.backupLog to true and the
following commands will save backups:
- linkgit:git-add[1] keeps all index changes. File removal is not
recorded.
- linkgit:git-commit[1] keep all index changes in `-a` or partial
commit mode.
- linkgit:git-apply[1] and linkgit:git-update-index[1] will keep
changes if `--keep-backup` is specified
- Changes of `$GIT_DIR/config` made by `git config --edit` are kept.
- Deleted reflogs are kept. References from this deleted reflog will
not be kept at the next garbage collection though. This is mostly
meant to immediately undo an accidental branch deletion.
- linkgit:git-checkout[1] when switching branches and
linkgit:git-merge[1] will make a backup before overwriting
ignored files.
- linkgit:git-checkout[1] with `--force`, linkgit:git-reset[1] with
`--hard` or linkgit:git-am[1] and linkgit:git-rebase[1] with
`--skip` or `--abort` will make a backup before overwriting non
up-to-date files.
- FIXME perhaps `git checkout <paths>` only makes backups on
"precious" paths only?
Backups are split in three groups, changes related in the index, in
working directory or in $GIT_DIR. These can be selected with `--id`
parameter as `index`, `worktree` and `gitdir` respectively.
Alternatively file path of these are `$GIT_DIR/index.bkl`,
`$GIT_DIR/worktree.bkl` and `$GIT_DIR/common/gitdir.bkl` which could
be specified with `--path`
This command is split into subcommands:
update::
Add a new change associated with `<path>` from `<old-hash>` to
`<new-hash>` to the selected backup log file. `<path>` must be
normalized.
log::
View the selected backup log (optionally filtered by pathspec).
By default, the diff of the change is shown.
cat::
Output the file content or their object name of a specific
change.
diff::
Output the diff of a specific change.
prune::
Prune the backup log, delete updates older than a specific
time or invalid entries. The actual backup content is still in
the object database and may need to be pruned separatedly.
OPTIONS
-------
--id=<id>::
The name of the the backup log file. Supported names are
`index`, `worktree` and `gitdir`.
--path=<path>::
The path of a backup log file.
--before::
Show the version before the change instead. This is most
useful when the change is a file deletion.
--hash::
Show the object name instead of content.
--expire=<time>::
The cutoff time for pruning backup log. The default is three
months ago.
<path>::
The path of the file where the change is made.
<old-hash>::
The blob hash of the content before the change.
<old-hash>::
The blob hash of the content after the change.
EXAMPLES
--------
Suppose you have carefully prepared your index with `git add -p` then
accidentally do
------------
$ git commit -am stuff
------------
You can see the old changes in the index with
------------
$ git backup-log --id=index log
------------
If you're only interested in specific paths
------------
$ git backup-log --id=index log -- <path> <path>...
------------
The changes should be close to the top. Once you have identified the
changes you want to recover. Take note of the number in "Change-Id"
line. You can get the file before "git commit -a" with
------------
$ git backup-log --id=index cat --before <change-id> <path>
------------
You could also get the whole diff (preferably with `-R` so that it's
applicable) with
------------
$ git backup-log --id=index diff -R <change-id>
------------
Similarly, if you accidentally do `git reset --hard` and want to
recover some files being overwritten by the command, start with
backup-log, but this time use the "worktree" log:
------------
$ git backup-log --id=worktree log
------------
The rest of the commands are the same as before.
If you deleted a branch by accident and recovered it, you could also
restore reflog, which is stored in "gitdir" backup log. You would need
to manually insert the reflog back in place (merge it with existing
one if needed):
------------
$ git backup-log --id=gitdir log
------------
GIT
---
Part of the linkgit:git[1] suite
......@@ -218,6 +218,9 @@ will remove the intended effect of the option.
the configured value will take effect next time the index is
read and this will remove the intended effect of the option.
--keep-backup::
Enable index backup log.
\--::
Do not interpret any more arguments as options.
......
......@@ -836,6 +836,7 @@ LIB_OBJS += archive-tar.o
LIB_OBJS += archive-zip.o
LIB_OBJS += argv-array.o
LIB_OBJS += attr.o
LIB_OBJS += backup-log.o
LIB_OBJS += base85.o
LIB_OBJS += bisect.o
LIB_OBJS += blame.o
......@@ -1038,6 +1039,7 @@ BUILTIN_OBJS += builtin/am.o
BUILTIN_OBJS += builtin/annotate.o
BUILTIN_OBJS += builtin/apply.o
BUILTIN_OBJS += builtin/archive.o
BUILTIN_OBJS += builtin/backup-log.o
BUILTIN_OBJS += builtin/bisect--helper.o
BUILTIN_OBJS += builtin/blame.o
BUILTIN_OBJS += builtin/branch.o
......
......@@ -21,6 +21,7 @@
#include "quote.h"
#include "rerere.h"
#include "apply.h"
#include "backup-log.h"
static void git_apply_config(void)
{
......@@ -230,6 +231,7 @@ struct patch {
char old_oid_prefix[GIT_MAX_HEXSZ + 1];
char new_oid_prefix[GIT_MAX_HEXSZ + 1];
struct patch *next;
struct object_id old_oid;
/* three-way fallback result */
struct object_id threeway_stage[3];
......@@ -4265,6 +4267,16 @@ static void patch_stats(struct apply_state *state, struct patch *patch)
static int remove_file(struct apply_state *state, struct patch *patch, int rmdir_empty)
{
if (state->update_index && !state->ita_only) {
if (state->backup_log) {
int pos = index_name_pos(state->repo->index,
patch->old_name,
strlen(patch->old_name));
if (pos >= 0)
oidcpy(&patch->old_oid,
&state->repo->index->cache[pos]->oid);
else
oidclr(&patch->old_oid);
}
if (remove_file_from_index(state->repo->index, patch->old_name) < 0)
return error(_("unable to remove %s from index"), patch->old_name);
}
......@@ -4280,7 +4292,8 @@ static int add_index_file(struct apply_state *state,
const char *path,
unsigned mode,
void *buf,
unsigned long size)
unsigned long size,
const struct object_id *old_oid)
{
struct stat st;
struct cache_entry *ce;
......@@ -4318,6 +4331,16 @@ static int add_index_file(struct apply_state *state,
"for newly created file %s"), path);
}
}
if (state->backup_log) {
struct strbuf *sb = state->repo->index->backup_log;
if (!sb) {
sb = xmalloc(sizeof(*sb));
strbuf_init(sb, 0);
state->repo->index->backup_log = sb;
}
bkl_append(sb, ce->name, old_oid, &ce->oid);
}
if (add_index_entry(state->repo->index, ce, ADD_CACHE_OK_TO_ADD) < 0) {
discard_cache_entry(ce);
return error(_("unable to add cache entry for %s"), path);
......@@ -4488,7 +4511,8 @@ static int create_file(struct apply_state *state, struct patch *patch)
if (patch->conflicted_threeway)
return add_conflicted_stages_file(state, patch);
else if (state->update_index)
return add_index_file(state, path, mode, buf, size);
return add_index_file(state, path, mode, buf, size,
&patch->old_oid);
return 0;
}
......@@ -4663,6 +4687,7 @@ static int apply_patch(struct apply_state *state,
struct patch *list = NULL, **listp = &list;
int skipped_patch = 0;
int res = 0;
int core_backup_log = 0;
state->patch_input_file = filename;
if (read_patch_file(&buf, fd) < 0)
......@@ -4726,6 +4751,13 @@ static int apply_patch(struct apply_state *state,
goto end;
}
if (state->backup_log &&
(!state->update_index ||
repo_config_get_bool(state->repo, "core.backupLog",
&core_backup_log) ||
!core_backup_log))
state->backup_log = 0;
if (state->check || state->apply) {
int r = check_patch_list(state, list);
if (r == -128) {
......@@ -4987,6 +5019,8 @@ int apply_parse_options(int argc, const char **argv,
N_("mark new files with `git add --intent-to-add`")),
OPT_BOOL(0, "cached", &state->cached,
N_("apply a patch without touching the working tree")),
OPT_BOOL(0, "keep-backup", &state->backup_log,
N_("log index changes if the feature is enabled")),
OPT_BOOL_F(0, "unsafe-paths", &state->unsafe_paths,
N_("accept a patch that touches outside the working area"),
PARSE_OPT_NOCOMPLETE),
......
......@@ -51,6 +51,7 @@ struct apply_state {
int check_index; /* preimage must match the indexed version */
int update_index; /* check_index && apply */
int ita_only; /* add intent-to-add entries to the index */
int backup_log; /* enable backup log */
/* These control cosmetic aspect of the output */
int diffstat; /* just show a diffstat, and don't actually apply */
......
#include "cache.h"
#include "backup-log.h"
#include "blob.h"
#include "lockfile.h"
#include "object-store.h"
#include "revision.h"
#include "strbuf.h"
#include "worktree.h"
void bkl_append(struct strbuf *output, const char *path,
const struct object_id *from,
const struct object_id *to)
{
if (oideq(from, to))
return;
/*
* Do paths with \n in them really exist? At least it's not
* often seen to justify the support. Just drop them otherwise
* we break the line-based format.
*/
if (strchr(path, '\n'))
return;
strbuf_addf(output, "%s %s %s\t%s\n", oid_to_hex(from),
oid_to_hex(to), git_committer_info(0), path);
}
static int bkl_write_unlocked(const char *path, struct strbuf *new_log)
{
int fd = open(path, O_CREAT | O_WRONLY | O_APPEND, 0666);
if (fd == -1)
return error_errno(_("unable to open %s"), path);
if (write_in_full(fd, new_log->buf, new_log->len) < 0) {
close(fd);
return error_errno(_("unable to update %s"), path);
}
close(fd);
return 0;
}
int bkl_write(const char *path, struct strbuf *new_log)
{
struct lock_file lk;
int ret;
ret = hold_lock_file_for_update(&lk, path, LOCK_REPORT_ON_ERROR);
if (ret == -1)
return -1;
ret = bkl_write_unlocked(path, new_log);
/*
* We do not write the the .lock file and append to the real one
* instead to reduce update cost. So we can't commit even in
* successful case.
*/
rollback_lock_file(&lk);
return ret;
}
int bkl_parse_entry(struct strbuf *sb, struct bkl_entry *re)
{
char *email_end, *message;
const char *p = sb->buf;
/* old SP new SP name <email> SP time TAB msg LF */
if (!sb->len || sb->buf[sb->len - 1] != '\n' ||
parse_oid_hex(p, &re->old_oid, &p) || *p++ != ' ' ||
parse_oid_hex(p, &re->new_oid, &p) || *p++ != ' ' ||
!(email_end = strchr(p, '>')) ||
email_end[1] != ' ')
return -1; /* corrupt? */
re->email = p;
re->timestamp = parse_timestamp(email_end + 2, &message, 10);
if (!re->timestamp ||
!message || message[0] != ' ' ||
(message[1] != '+' && message[1] != '-') ||
!isdigit(message[2]) || !isdigit(message[3]) ||
!isdigit(message[4]) || !isdigit(message[5]))
return -1; /* corrupt? */
email_end[1] = '\0';
re->tz = strtol(message + 1, NULL, 10);
if (message[6] != '\t')
message += 6;
else
message += 7;
sb->buf[sb->len - 1] = '\0'; /* no LF */
re->path = message;
return 0;
}
static char *find_beginning_of_line(char *bob, char *scan)
{
while (bob < scan && *(--scan) != '\n')
; /* keep scanning backwards */
/*
* Return either beginning of the buffer, or LF at the end of
* the previous line.
*/
return scan;
}
int bkl_parse_file_reverse(const char *path,
int (*parse)(struct strbuf *line, void *data),
void *data)
{
struct strbuf sb = STRBUF_INIT;
FILE *logfp;
long pos;
int ret = 0, at_tail = 1;
logfp = fopen(path, "r");
if (!logfp) {
if (errno == ENOENT || errno == ENOTDIR)
return 0;
return -1;
}
/* Jump to the end */
if (fseek(logfp, 0, SEEK_END) < 0)
ret = error_errno(_("cannot seek back in %s"), path);
pos = ftell(logfp);
while (!ret && 0 < pos) {
int cnt;
size_t nread;
char buf[BUFSIZ];
char *endp, *scanp;
/* Fill next block from the end */
cnt = (sizeof(buf) < pos) ? sizeof(buf) : pos;
if (fseek(logfp, pos - cnt, SEEK_SET)) {
ret = error_errno(_("cannot seek back in %s"), path);
break;
}
nread = fread(buf, cnt, 1, logfp);
if (nread != 1) {
ret = error_errno(_("cannot read %d bytes from %s"),
cnt, path);
break;
}
pos -= cnt;
scanp = endp = buf + cnt;
if (at_tail && scanp[-1] == '\n')
/* Looking at the final LF at the end of the file */
scanp--;
at_tail = 0;
while (buf < scanp) {
/*
* terminating LF of the previous line, or the beginning
* of the buffer.
*/
char *bp;
bp = find_beginning_of_line(buf, scanp);
if (*bp == '\n') {
/*
* The newline is the end of the previous line,
* so we know we have complete line starting
* at (bp + 1). Prefix it onto any prior data
* we collected for the line and process it.
*/
strbuf_splice(&sb, 0, 0, bp + 1, endp - (bp + 1));
scanp = bp;
endp = bp + 1;
ret = parse(&sb, data);
strbuf_reset(&sb);
if (ret)
break;
} else if (!pos) {
/*
* We are at the start of the buffer, and the
* start of the file; there is no previous
* line, and we have everything for this one.
* Process it, and we can end the loop.
*/
strbuf_splice(&sb, 0, 0, buf, endp - buf);
ret = parse(&sb, data);
strbuf_reset(&sb);
break;
}
if (bp == buf) {
/*
* We are at the start of the buffer, and there
* is more file to read backwards. Which means
* we are in the middle of a line. Note that we
* may get here even if *bp was a newline; that
* just means we are at the exact end of the
* previous line, rather than some spot in the
* middle.
*
* Save away what we have to be combined with
* the data from the next read.
*/
strbuf_splice(&sb, 0, 0, buf, endp - buf);
break;
}
}
}
if (!ret && sb.len)
BUG("reverse reflog parser had leftover data");
fclose(logfp);
strbuf_release(&sb);
return ret;
}
int bkl_parse_file(const char *path,
int (*parse)(struct strbuf *line, void *data),
void *data)
{
struct strbuf sb = STRBUF_INIT;
FILE *logfp;
int ret = 0;
logfp = fopen(path, "r");
if (!logfp) {
if (errno == ENOENT || errno == ENOTDIR)
return 0;
return -1;
}
while (!ret && !strbuf_getwholeline(&sb, logfp, '\n'))
ret = parse(&sb, data);
fclose(logfp);
strbuf_release(&sb);
return ret;
}
struct prune_options {
struct repository *repo;
FILE *fp;
timestamp_t expire;
struct strbuf copy;
};
static int good_oid(struct repository *r, const struct object_id *oid)
{
if (is_null_oid(oid))
return 0;
return oid_object_info(r, oid, NULL) == OBJ_BLOB;
}
static int prune_parse(struct strbuf *line, void *data)
{
struct prune_options *opts = data;
struct bkl_entry entry;
strbuf_reset(&opts->copy);
strbuf_addbuf(&opts->copy, line);
if (bkl_parse_entry(line, &entry))
return -1;
if (entry.timestamp < opts->expire)
return 0;
if (oideq(&entry.old_oid, &entry.new_oid))
return 0;
if (!good_oid(opts->repo, &entry.old_oid) ||
!good_oid(opts->repo, &entry.new_oid))
return 0;
if (!opts->fp)
return -1;
fputs(opts->copy.buf, opts->fp);
return 0;
}
int bkl_prune(struct repository *r, const char *path, timestamp_t expire)
{
struct lock_file lk;
struct prune_options opts;
int ret;
ret = hold_lock_file_for_update(&lk, path, 0);
if (ret == -1) {
if (errno == ENOTDIR || errno == ENOENT)
return 0;
return error(_("failed to lock '%s'"), path);
}
opts.repo = r;
opts.expire = expire;
opts.fp = fdopen_lock_file(&lk, "w");
strbuf_init(&opts.copy, 0);
ret = bkl_parse_file(path, prune_parse, &opts);
if (ret < 0)
rollback_lock_file(&lk);
else
ret = commit_lock_file(&lk);
strbuf_release(&opts.copy);
return ret;
}
void bkl_prune_all_or_die(struct repository *r, timestamp_t expire)
{
struct worktree **worktrees, **p;
char *bkl_path;
bkl_path = repo_git_path(r, "common/gitdir.bkl");
if (bkl_prune(r, bkl_path, expire))
die(_("failed to prune %s"), "gitdir.bkl");
free(bkl_path);
worktrees = get_worktrees(0);
for (p = worktrees; *p; p++) {
struct worktree *wt = *p;
if (bkl_prune(r, worktree_git_path(wt, "index.bkl"), expire)) {
if (wt->id)
die(_("failed to prune %s on working tree '%s'"),
"index.bkl", wt->id);
else
die(_("failed to prune %s"), "index.bkl");
}
if (bkl_prune(r, worktree_git_path(wt, "worktree.bkl"), expire)) {
if (wt->id)
die(_("failed to prune %s on working tree '%s'"),
"worktree.bkl", wt->id);
else
die(_("failed to prune %s"), "worktree.bkl");
}
}
free_worktrees(worktrees);
}
struct pending_cb {
struct rev_info *revs;
unsigned flags;
};
static void add_blob_to_pending(const struct object_id *oid,
const char *path,
struct pending_cb *cb)
{
struct blob *blob;
if (!good_oid(cb->revs->repo, oid))
return;
blob = lookup_blob(cb->revs->repo, oid);
blob->object.flags |= cb->flags;
add_pending_object(cb->revs, &blob->object, path);
}
static int add_pending(struct strbuf *line, void *cb)
{
struct bkl_entry entry;
if (bkl_parse_entry(line, &entry))
return -1;
add_blob_to_pending(&entry.old_oid, entry.path, cb);
add_blob_to_pending(&entry.new_oid, entry.path, cb);
return 0;
}
static void add_backup_log_to_pending(const char *path, struct pending_cb *cb)
{
bkl_parse_file(path, add_pending, cb);
}
void add_backup_logs_to_pending(struct rev_info *revs, unsigned flags)
{
struct worktree **worktrees, **p;
char *path;
struct pending_cb cb;
cb.revs = revs;
cb.flags = flags;
worktrees = get_worktrees(0);
for (p = worktrees; *p; p++) {
struct worktree *wt = *p;
path = xstrdup(worktree_git_path(wt, "index.bkl"));
add_backup_log_to_pending(path, &cb);
free(path);
path = xstrdup(worktree_git_path(wt, "worktree.bkl"));
add_backup_log_to_pending(path, &cb);
free(path);
}
free_worktrees(worktrees);
path = git_pathdup("common/gitdir.bkl");
add_backup_log_to_pending(path, &cb);
free(path);
}
#ifndef __BACKUP_LOG_H__
#define __BACKUP_LOG_H__
#include "cache.h"
struct repository;
struct rev_info;
struct strbuf;
struct bkl_entry