Commit 06f0f40d authored by gerd's avatar gerd

updates


git-svn-id: https://gps.dynxs.de/private/svn/app-plasma/[email protected] 55289a75-7b90-4627-9e07-ffb4263930b2
parent ec86f05f
......@@ -18,10 +18,10 @@ for mapred demo
8. plasma_client: make that buffer-backed read manages transactions
automatically - DONE
9. local block access - DONE
10. Plasma_client: fix error reporting for flush errors
10. Plasma_client: fix error reporting for flush errors - DONE
11. mutex bug - DONE
12. map task generation bug - DONE
13. allow bigger blocksizes
13. allow bigger blocksizes - DONE
[Sun Jun 6 00:40:42 2010] [Nn_manager] [err] Error in commit_transaction: File
"nn_state.ml", line 571, characters 3-9: Assertion failed
......@@ -63,6 +63,8 @@ How comes that blockmap gets out of sync with inodeblocks?
BUG "Cannot sync as fast as configured" at namenode startup
Plasma_client/other clients: configurable retry constant
rename directories
documentation for release:
......@@ -144,7 +146,7 @@ Mapred:
128 blocks a new one. Change that so these segments are appended
to the current map task if this is not yet full - DONE
- Mapred_sched: the 128 should be configurable
- Mapred_sched: the 128 should be configurable - DONE
- Mapred_main: plasmamr_task* temp file is not deleted
......@@ -152,11 +154,9 @@ Mapred:
- record reader : do not keep the transaction open all the time - DONE
- improvement: the reducer could also do the task of the last shuffle
- better config: size of map task (instead of number of map tasks)
- improvement: the reducer could also do the task of the last shuffle - DONE
- Sort: automatically use a disk sort when the input is too large
- Sort: automatically use a disk sort when the input is too large - DONE
------------
......@@ -342,6 +342,17 @@ What should be changed in ocamlnet?
- symlink resolution
- set mtime at link/unlink time for the dir inode - DONE
- explicitly store the next inode number to allocate in the database
- allocation algorithm: the "relative balancing criterion" is bad
when new nodes are added - the new nodes would get practically
all load
maybe better:
* 50% of the blocks are allocated equally over all nodes
* 50% of the blocks are allocated by the relative balancing criterion
- Plasma_client.ic_evict: the eviction criterion is arbitrary and too
low for many applications
Hard problem:
Plasma_client.write cannot return `enospc because this error is first
......
......@@ -282,6 +282,7 @@ let add_inputs plan mj =
pc.task_servers_ip;
let trans = Plasma_client.start c in
let blocksize = Plasma_client.blocksize c in
let files = Plasma_client.list trans mj#input_dir in
let files_ii =
List.map
......@@ -337,11 +338,15 @@ let add_inputs plan mj =
*)
in
(** analyze in groups of as many blocks as fit in 64M *)
let sched_limit = 64 * 1024 * 1024 in
let sched_size = max 1 (sched_limit / blocksize) in
let sched_sizeL = Int64.of_int sched_size in
List.iter
(fun (filename,inode,ii) ->
(** analyze in groups of 128 blocks *)
let n_groupsL =
Int64.succ (Int64.div (Int64.pred ii.blocklimit) 128L) in
Int64.succ (Int64.div (Int64.pred ii.blocklimit) sched_sizeL) in
if n_groupsL > Int64.of_int max_int then
failwith "Mapred_sched: input file has too many blocks";
(* well, that limit is really high, on 32 bit systems:
......@@ -353,8 +358,8 @@ let add_inputs plan mj =
let n_groups = Int64.to_int n_groupsL in
for g = 0 to n_groups - 1 do
(** Retrieve the blocklist (EXPENSIVE) *)
let pos0 = Int64.mul (Int64.of_int g) 128L in
let pos1 = min (Int64.add pos0 128L) ii.blocklimit in
let pos0 = Int64.mul (Int64.of_int g) sched_sizeL in
let pos1 = min (Int64.add pos0 sched_sizeL) ii.blocklimit in
let lenL = Int64.sub pos1 pos0 in
let len = Int64.to_int lenL in
let blocks = Plasma_client.get_blocklist trans inode pos0 len in
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment